From d6cbf3366e63e4e4333e49377ee994d26b266a6c Mon Sep 17 00:00:00 2001 From: Tim Angus Date: Fri, 24 Aug 2007 00:32:53 +0000 Subject: * Merged ioq3-r1133 + PNG image loader + Non-gas dependent x86_64 VM + Collision optimisations + Slew of other bug fixes --- Makefile | 5 +- src/client/cl_keys.c | 90 +- src/null/null_client.c | 2 +- src/qcommon/cm_patch.c | 7 +- src/qcommon/cm_test.c | 4 + src/qcommon/cm_trace.c | 5 + src/qcommon/common.c | 2 +- src/qcommon/files.c | 28 +- src/qcommon/msg.c | 4 +- src/qcommon/puff.c | 758 +++++++++++ src/qcommon/puff.h | 43 + src/qcommon/q_math.c | 47 + src/qcommon/q_shared.h | 9 +- src/qcommon/vm_x86.c | 2 +- src/qcommon/vm_x86_64.c | 152 ++- src/qcommon/vm_x86_64_assembler.c | 1419 ++++++++++++++++++++ src/renderer/tr_image.c | 2618 +++++++++++++++++++++++++++++++++++-- src/renderer/tr_init.c | 2 +- src/renderer/tr_shader.c | 86 +- src/unix/linux_signals.c | 2 +- 20 files changed, 5046 insertions(+), 239 deletions(-) create mode 100644 src/qcommon/puff.c create mode 100644 src/qcommon/puff.h create mode 100644 src/qcommon/vm_x86_64_assembler.c diff --git a/Makefile b/Makefile index 2f3f339c..37ec7fe1 100644 --- a/Makefile +++ b/Makefile @@ -924,6 +924,7 @@ Q3OBJ = \ $(B)/client/q_shared.o \ \ $(B)/client/unzip.o \ + $(B)/client/puff.o \ $(B)/client/vm.o \ $(B)/client/vm_interpreted.o \ \ @@ -999,7 +1000,7 @@ ifeq ($(HAVE_VM_COMPILED),true) Q3OBJ += $(B)/client/vm_x86.o endif ifeq ($(ARCH),x86_64) - Q3OBJ += $(B)/client/vm_x86_64.o + Q3OBJ += $(B)/client/vm_x86_64.o $(B)/client/vm_x86_64_assembler.o endif ifeq ($(ARCH),ppc) Q3OBJ += $(B)/client/vm_ppc.o @@ -1125,7 +1126,7 @@ ifeq ($(HAVE_VM_COMPILED),true) Q3DOBJ += $(B)/ded/vm_x86.o endif ifeq ($(ARCH),x86_64) - Q3DOBJ += $(B)/ded/vm_x86_64.o + Q3DOBJ += $(B)/ded/vm_x86_64.o $(B)/client/vm_x86_64_assembler.o endif ifeq ($(ARCH),ppc) Q3DOBJ += $(B)/ded/vm_ppc.o diff --git a/src/client/cl_keys.c b/src/client/cl_keys.c index b283ffbc..950cb5b1 100644 --- a/src/client/cl_keys.c +++ b/src/client/cl_keys.c @@ -307,7 +307,7 @@ EDIT FIELDS Field_Draw Handles horizontal scrolling and cursor blinking -x, y, amd width are in pixels +x, y, and width are in pixels =================== */ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, qboolean showCursor ) { @@ -318,8 +318,8 @@ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, q char str[MAX_STRING_CHARS]; int i; - drawLen = edit->widthInChars; - len = strlen( edit->buffer ) + 1; + drawLen = edit->widthInChars - 1; // - 1 so there is always a space for the cursor + len = strlen( edit->buffer ); // guarantee that cursor will be visible if ( len <= drawLen ) { @@ -332,14 +332,6 @@ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, q } } prestep = edit->scroll; - -/* - if ( edit->cursor < len - drawLen ) { - prestep = edit->cursor; // cursor at start - } else { - prestep = len - drawLen; - } -*/ } if ( prestep + drawLen > len ) { @@ -380,7 +372,7 @@ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, q cursorChar = 10; } - i = drawLen - ( Q_PrintStrlen( str ) + 1 ); + i = drawLen - Q_PrintStrlen( str ); if ( size == SMALLCHAR_WIDTH ) { SCR_DrawSmallChar( x + ( edit->cursor - prestep - i ) * size, y, cursorChar ); @@ -445,54 +437,50 @@ void Field_KeyDownEvent( field_t *edit, int key ) { return; } + key = tolower( key ); len = strlen( edit->buffer ); - if ( key == K_DEL ) { - if ( edit->cursor < len ) { - memmove( edit->buffer + edit->cursor, - edit->buffer + edit->cursor + 1, len - edit->cursor ); - } - return; - } + switch ( key ) { + case K_DEL: + if ( edit->cursor < len ) { + memmove( edit->buffer + edit->cursor, + edit->buffer + edit->cursor + 1, len - edit->cursor ); + } + break; - if ( key == K_RIGHTARROW ) - { - if ( edit->cursor < len ) { - edit->cursor++; - } + case K_RIGHTARROW: + if ( edit->cursor < len ) { + edit->cursor++; + } + break; - if ( edit->cursor >= edit->scroll + edit->widthInChars && edit->cursor <= len ) - { - edit->scroll++; - } - return; - } + case K_LEFTARROW: + if ( edit->cursor > 0 ) { + edit->cursor--; + } + break; - if ( key == K_LEFTARROW ) - { - if ( edit->cursor > 0 ) { - edit->cursor--; - } - if ( edit->cursor < edit->scroll ) - { - edit->scroll--; - } - return; - } + case K_HOME: + edit->cursor = 0; + break; - if ( key == K_HOME || ( tolower(key) == 'a' && keys[K_CTRL].down ) ) { - edit->cursor = 0; - return; - } + case K_END: + edit->cursor = len; + break; - if ( key == K_END || ( tolower(key) == 'e' && keys[K_CTRL].down ) ) { - edit->cursor = len; - return; + case K_INS: + key_overstrikeMode = !key_overstrikeMode; + break; + + default: + break; } - if ( key == K_INS ) { - key_overstrikeMode = !key_overstrikeMode; - return; + // Change scroll if cursor is no longer visible + if ( edit->cursor < edit->scroll ) { + edit->scroll = edit->cursor; + } else if ( edit->cursor >= edit->scroll + edit->widthInChars && edit->cursor <= len ) { + edit->scroll = edit->cursor - edit->widthInChars + 1; } } diff --git a/src/null/null_client.c b/src/null/null_client.c index 994f53f4..2b98195c 100644 --- a/src/null/null_client.c +++ b/src/null/null_client.c @@ -86,4 +86,4 @@ void CL_StartHunkUsers( void ) { } // bk001119 - added new dummy for sv_init.c -void CL_ShutdownAll(void) {}; +void CL_ShutdownAll(void) {} diff --git a/src/qcommon/cm_patch.c b/src/qcommon/cm_patch.c index f262db9c..38b7d5cc 100644 --- a/src/qcommon/cm_patch.c +++ b/src/qcommon/cm_patch.c @@ -1154,7 +1154,7 @@ struct patchCollide_s *CM_GeneratePatchCollide( int width, int height, vec3_t *p if ( width <= 2 || height <= 2 || !points ) { Com_Error( ERR_DROP, "CM_GeneratePatchFacets: bad parameters: (%i, %i, %p)", - width, height, points ); + width, height, (void *)points ); } if ( !(width & 1) || !(height & 1) ) { @@ -1387,6 +1387,11 @@ void CM_TraceThroughPatchCollide( traceWork_t *tw, const struct patchCollide_s * static cvar_t *cv; #endif //BSPC + if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1], + pc->bounds[0], pc->bounds[1] ) ) { + return; + } + if (tw->isPoint) { CM_TracePointThroughPatchCollide( tw, pc ); return; diff --git a/src/qcommon/cm_test.c b/src/qcommon/cm_test.c index 9e950603..485facc2 100644 --- a/src/qcommon/cm_test.c +++ b/src/qcommon/cm_test.c @@ -251,6 +251,10 @@ int CM_PointContents( const vec3_t p, clipHandle_t model ) { brushnum = cm.leafbrushes[leaf->firstLeafBrush+k]; b = &cm.brushes[brushnum]; + if ( !BoundsIntersectPoint( b->bounds[0], b->bounds[1], p ) ) { + continue; + } + // see if the point is in the brush for ( i = 0 ; i < b->numsides ; i++ ) { d = DotProduct( p, b->sides[i].plane->normal ); diff --git a/src/qcommon/cm_trace.c b/src/qcommon/cm_trace.c index ee9540e3..c40e1013 100644 --- a/src/qcommon/cm_trace.c +++ b/src/qcommon/cm_trace.c @@ -847,6 +847,11 @@ void CM_TraceThroughLeaf( traceWork_t *tw, cLeaf_t *leaf ) { b->collided = qfalse; + if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1], + b->bounds[0], b->bounds[1] ) ) { + continue; + } + CM_TraceThroughBrush( tw, b ); if ( !tw->trace.fraction ) { tw->trace.lateralFraction = 0.0f; diff --git a/src/qcommon/common.c b/src/qcommon/common.c index cdfe3386..be4eafd1 100644 --- a/src/qcommon/common.c +++ b/src/qcommon/common.c @@ -1257,7 +1257,7 @@ void Com_Meminfo_f( void ) { for (block = mainzone->blocklist.next ; ; block = block->next) { if ( Cmd_Argc() != 1 ) { Com_Printf ("block:%p size:%7i tag:%3i\n", - block, block->size, block->tag); + (void *)block, block->size, block->tag); } if ( block->tag ) { zoneBytes += block->size; diff --git a/src/qcommon/files.c b/src/qcommon/files.c index cd369908..c1f6fb2d 100644 --- a/src/qcommon/files.c +++ b/src/qcommon/files.c @@ -973,22 +973,6 @@ qboolean FS_FilenameCompare( const char *s1, const char *s2 ) { return qfalse; // strings are equal } -/* -=========== -FS_ShiftedStrStr -=========== -*/ -char *FS_ShiftedStrStr(const char *string, const char *substring, int shift) { - char buf[MAX_STRING_TOKENS]; - int i; - - for (i = 0; substring[i]; i++) { - buf[i] = substring[i] + shift; - } - buf[i] = '\0'; - return strstr(string, buf); -} - /* =========== FS_FOpenFileRead @@ -1117,19 +1101,13 @@ int FS_FOpenFileRead( const char *filename, fileHandle_t *file, qboolean uniqueF } } - // game.qvm - 13 - // ZT`X!di` - if (!(pak->referenced & FS_QAGAME_REF) && FS_ShiftedStrStr(filename, "ZT`X!di`", 13)) { + if (!(pak->referenced & FS_QAGAME_REF) && strstr(filename, "game.qvm")) { pak->referenced |= FS_QAGAME_REF; } - // cgame.qvm - 7 - // \`Zf^'jof - if (!(pak->referenced & FS_CGAME_REF) && FS_ShiftedStrStr(filename , "\\`Zf^'jof", 7)) { + if (!(pak->referenced & FS_CGAME_REF) && strstr(filename, "cgame.qvm")) { pak->referenced |= FS_CGAME_REF; } - // ui.qvm - 5 - // pd)lqh - if (!(pak->referenced & FS_UI_REF) && FS_ShiftedStrStr(filename , "pd)lqh", 5)) { + if (!(pak->referenced & FS_UI_REF) && strstr(filename, "ui.qvm")) { pak->referenced |= FS_UI_REF; } diff --git a/src/qcommon/msg.c b/src/qcommon/msg.c index 46019809..d46c2a3b 100644 --- a/src/qcommon/msg.c +++ b/src/qcommon/msg.c @@ -791,7 +791,7 @@ typedef struct { } netField_t; // using the stringizing operator to save typing... -#define NETF(x) #x,(int)&((entityState_t*)0)->x +#define NETF(x) #x,(size_t)&((entityState_t*)0)->x netField_t entityStateFields[] = { @@ -1106,7 +1106,7 @@ plyer_state_t communication */ // using the stringizing operator to save typing... -#define PSF(x) #x,(int)&((playerState_t*)0)->x +#define PSF(x) #x,(size_t)&((playerState_t*)0)->x netField_t playerStateFields[] = { diff --git a/src/qcommon/puff.c b/src/qcommon/puff.c new file mode 100644 index 00000000..721854d8 --- /dev/null +++ b/src/qcommon/puff.c @@ -0,0 +1,758 @@ +/* + * This is a modified version of Mark Adlers work, + * see below for the original copyright. + * 2006 - Joerg Dietrich + */ + +/* + * puff.c + * Copyright (C) 2002-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in puff.h + * version 1.8, 9 Jan 2004 + * + * puff.c is a simple inflate written to be an unambiguous way to specify the + * deflate format. It is not written for speed but rather simplicity. As a + * side benefit, this code might actually be useful when small code is more + * important than speed, such as bootstrap applications. For typical deflate + * data, zlib's inflate() is about four times as fast as puff(). zlib's + * inflate compiles to around 20K on my machine, whereas puff.c compiles to + * around 4K on my machine (a PowerPC using GNU cc). If the faster decode() + * function here is used, then puff() is only twice as slow as zlib's + * inflate(). + * + * All dynamically allocated memory comes from the stack. The stack required + * is less than 2K bytes. This code is compatible with 16-bit int's and + * assumes that long's are at least 32 bits. puff.c uses the short data type, + * assumed to be 16 bits, for arrays in order to to conserve memory. The code + * works whether integers are stored big endian or little endian. + * + * In the comments below are "Format notes" that describe the inflate process + * and document some of the less obvious aspects of the format. This source + * code is meant to supplement RFC 1951, which formally describes the deflate + * format: + * + * http://www.zlib.org/rfc-deflate.html + */ + +/* + * Change history: + * + * 1.0 10 Feb 2002 - First version + * 1.1 17 Feb 2002 - Clarifications of some comments and notes + * - Update puff() dest and source pointers on negative + * errors to facilitate debugging deflators + * - Remove longest from struct huffman -- not needed + * - Simplify offs[] index in construct() + * - Add input size and checking, using longjmp() to + * maintain easy readability + * - Use short data type for large arrays + * - Use pointers instead of long to specify source and + * destination sizes to avoid arbitrary 4 GB limits + * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!), + * but leave simple version for readabilty + * - Make sure invalid distances detected if pointers + * are 16 bits + * - Fix fixed codes table error + * - Provide a scanning mode for determining size of + * uncompressed data + * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Jean-loup] + * - Add a puff.h file for the interface + * - Add braces in puff() for else do [Jean-loup] + * - Use indexes instead of pointers for readability + * 1.4 31 Mar 2002 - Simplify construct() code set check + * - Fix some comments + * - Add FIXLCODES #define + * 1.5 6 Apr 2002 - Minor comment fixes + * 1.6 7 Aug 2002 - Minor format changes + * 1.7 3 Mar 2003 - Added test code for distribution + * - Added zlib-like license + * 1.8 9 Jan 2004 - Added some comments on no distance codes case + */ + +#include /* for setjmp(), longjmp(), and jmp_buf */ +#include "puff.h" /* prototype for puff() */ + +#define local static /* for local function definitions */ + +/* + * Maximums for allocations and loops. It is not useful to change these -- + * they are fixed by the deflate format. + */ +#define MAXBITS 15 /* maximum bits in a code */ +#define MAXLCODES 286 /* maximum number of literal/length codes */ +#define MAXDCODES 30 /* maximum number of distance codes */ +#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */ +#define FIXLCODES 288 /* number of fixed literal/length codes */ + +/* input and output state */ +struct state { + /* output state */ + uint8_t *out; /* output buffer */ + uint32_t outlen; /* available space at out */ + uint32_t outcnt; /* bytes written to out so far */ + + /* input state */ + uint8_t *in; /* input buffer */ + uint32_t inlen; /* available input at in */ + uint32_t incnt; /* bytes read so far */ + int32_t bitbuf; /* bit buffer */ + int32_t bitcnt; /* number of bits in bit buffer */ + + /* input limit error return state for bits() and decode() */ + jmp_buf env; +}; + +/* + * Return need bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ +local int32_t bits(struct state *s, int32_t need) +{ + int32_t val; /* bit accumulator (can use up to 20 bits) */ + + /* load at least need bits into val */ + val = s->bitbuf; + while (s->bitcnt < need) { + if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ + val |= (int32_t)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */ + s->bitcnt += 8; + } + + /* drop need bits and update buffer, always zero to seven bits left */ + s->bitbuf = (int32_t)(val >> need); + s->bitcnt -= need; + + /* return need bits, zeroing the bits above that */ + return (int32_t)(val & ((1L << need) - 1)); +} + +/* + * Process a stored block. + * + * Format notes: + * + * - After the two-bit stored block type (00), the stored block length and + * stored bytes are byte-aligned for fast copying. Therefore any leftover + * bits in the byte that has the last bit of the type, as many as seven, are + * discarded. The value of the discarded bits are not defined and should not + * be checked against any expectation. + * + * - The second inverted copy of the stored block length does not have to be + * checked, but it's probably a good idea to do so anyway. + * + * - A stored block can have zero length. This is sometimes used to byte-align + * subsets of the compressed data for random access or partial recovery. + */ +local int32_t stored(struct state *s) +{ + uint32_t len; /* length of stored block */ + + /* discard leftover bits from current byte (assumes s->bitcnt < 8) */ + s->bitbuf = 0; + s->bitcnt = 0; + + /* get length and check against its one's complement */ + if (s->incnt + 4 > s->inlen) return 2; /* not enough input */ + len = s->in[s->incnt++]; + len |= s->in[s->incnt++] << 8; + if (s->in[s->incnt++] != (~len & 0xff) || + s->in[s->incnt++] != ((~len >> 8) & 0xff)) + return -2; /* didn't match complement! */ + + /* copy len bytes from in to out */ + if (s->incnt + len > s->inlen) return 2; /* not enough input */ + if (s->out != NULL) { + if (s->outcnt + len > s->outlen) + return 1; /* not enough output space */ + while (len--) + s->out[s->outcnt++] = s->in[s->incnt++]; + } + else { /* just scanning */ + s->outcnt += len; + s->incnt += len; + } + + /* done with a valid stored block */ + return 0; +} + +/* + * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of + * each length, which for a canonical code are stepped through in order. + * symbol[] are the symbol values in canonical order, where the number of + * entries is the sum of the counts in count[]. The decoding process can be + * seen in the function decode() below. + */ +struct huffman { + int16_t *count; /* number of symbols of each length */ + int16_t *symbol; /* canonically ordered symbols */ +}; + +/* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -9 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. A table-based decoding + * scheme (as used in zlib) does not need to do this reversal. + * + * - The first code for the shortest length is all zeros. Subsequent codes of + * the same length are simply integer increments of the previous code. When + * moving up a length, a zero bit is appended to the code. For a complete + * code, the last code of the longest length will be all ones. + * + * - Incomplete codes are handled by this decoder, since they are permitted + * in the deflate format. See the format notes for fixed() and dynamic(). + */ +local int32_t decode(struct state *s, struct huffman *h) +{ + int32_t len; /* current number of bits in code */ + int32_t code; /* len bits being decoded */ + int32_t first; /* first code of length len */ + int32_t count; /* number of codes of length len */ + int32_t index; /* index of first code of length len in symbol table */ + int32_t bitbuf; /* bits from stream */ + int32_t left; /* bits left in next or left to process */ + int16_t *next; /* next number of codes */ + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while (1) { + while (left--) { + code |= bitbuf & 1; + bitbuf >>= 1; + count = *next++; + if (code < first + count) { /* if length len, return symbol */ + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS+1) - len; + if (left == 0) break; + if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ + bitbuf = s->in[s->incnt++]; + if (left > 8) left = 8; + } + return -9; /* ran out of codes */ +} + +/* + * Given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. The + * return value is zero for a complete code set, negative for an over- + * subscribed code set, and positive for an incomplete code set. The tables + * can be used if the return value is zero or positive, but they cannot be used + * if the return value is negative. If the return value is zero, it is not + * possible for decode() using that table to return an error--any stream of + * enough bits will resolve to a symbol. If the return value is positive, then + * it is possible for decode() using that table to return an error for received + * codes past the end of the incomplete lengths. + * + * Not used by decode(), but used for error checking, h->count[0] is the number + * of the n symbols not in the code. So n - h->count[0] is the number of + * codes. This is useful for checking for incomplete codes that have more than + * one symbol, which is an error in a dynamic block. + * + * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS + * This is assured by the construction of the length arrays in dynamic() and + * fixed() and is not verified by construct(). + * + * Format notes: + * + * - Permitted and expected examples of incomplete codes are one of the fixed + * codes and any code with a single symbol which in deflate is coded as one + * bit instead of zero bits. See the format notes for fixed() and dynamic(). + * + * - Within a given code length, the symbols are kept in ascending order for + * the code bits definition. + */ +local int32_t construct(struct huffman *h, int16_t *length, int32_t n) +{ + int32_t symbol; /* current symbol when stepping through length[] */ + int32_t len; /* current length when stepping through h->count[] */ + int32_t left; /* number of possible codes left of current length */ + int16_t offs[MAXBITS+1]; /* offsets in symbol table for each length */ + + /* count number of codes of each length */ + for (len = 0; len <= MAXBITS; len++) + h->count[len] = 0; + for (symbol = 0; symbol < n; symbol++) + (h->count[length[symbol]])++; /* assumes lengths are within bounds */ + if (h->count[0] == n) /* no codes! */ + return 0; /* complete, but decode() will fail */ + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; /* one possible code of zero length */ + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; /* one more bit, double codes left */ + left -= h->count[len]; /* deduct count from possible codes */ + if (left < 0) return left; /* over-subscribed--return negative */ + } /* left > 0 means incomplete */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + h->count[len]; + + /* + * put symbols in table sorted by length, by symbol order within each + * length + */ + for (symbol = 0; symbol < n; symbol++) + if (length[symbol] != 0) + h->symbol[offs[length[symbol]]++] = symbol; + + /* return zero for complete set, positive for incomplete set */ + return left; +} + +/* + * Decode literal/length and distance codes until an end-of-block code. + * + * Format notes: + * + * - Compressed data that is after the block type if fixed or after the code + * description if dynamic is a combination of literals and length/distance + * pairs terminated by and end-of-block code. Literals are simply Huffman + * coded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - Literals, lengths, and the end-of-block code are combined into a single + * code of up to 286 symbols. They are 256 literals (0..255), 29 length + * symbols (257..285), and the end-of-block symbol (256). + * + * - There are 256 possible lengths (3..258), and so 29 symbols are not enough + * to represent all of those. Lengths 3..10 and 258 are in fact represented + * by just a length symbol. Lengths 11..257 are represented as a symbol and + * some number of extra bits that are added as an integer to the base length + * of the length symbol. The number of extra bits is determined by the base + * length symbol. These are in the static arrays below, lens[] for the base + * lengths and lext[] for the corresponding number of extra bits. + * + * - The reason that 258 gets its own symbol is that the longest length is used + * often in highly redundant files. Note that 258 can also be coded as the + * base value 227 plus the maximum extra value of 31. While a good deflate + * should never do this, it is not an error, and should be decoded properly. + * + * - If a length is decoded, including its extra bits if any, then it is + * followed a distance code. There are up to 30 distance symbols. Again + * there are many more possible distances (1..32768), so extra bits are added + * to a base value represented by the symbol. The distances 1..4 get their + * own symbol, but the rest require extra bits. The base distances and + * corresponding number of extra bits are below in the static arrays dist[] + * and dext[]. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 258 + * simply copies the last byte 258 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. You should not use memcpy() since its behavior is not + * defined for overlapped arrays. You should not use memmove() or bcopy() + * since though their behavior -is- defined for overlapping arrays, it is + * defined to do the wrong thing in this case. + */ +local int32_t codes(struct state *s, + struct huffman *lencode, + struct huffman *distcode) +{ + int32_t symbol; /* decoded symbol */ + int32_t len; /* length for copy */ + uint32_t dist; /* distance for copy */ + static const int16_t lens[29] = { /* Size base for length codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; + static const int16_t lext[29] = { /* Extra bits for length codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; + static const int16_t dists[30] = { /* Offset base for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; + static const int16_t dext[30] = { /* Extra bits for distance codes 0..29 */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + + /* decode literals and length/distance pairs */ + do { + symbol = decode(s, lencode); + if (symbol < 0) return symbol; /* invalid symbol */ + if (symbol < 256) { /* literal: symbol is the byte */ + /* write out the literal */ + if (s->out != NULL) { + if (s->outcnt == s->outlen) return 1; + s->out[s->outcnt] = symbol; + } + s->outcnt++; + } + else if (symbol > 256) { /* length */ + /* get and compute length */ + symbol -= 257; + if (symbol >= 29) return -9; /* invalid fixed code */ + len = lens[symbol] + bits(s, lext[symbol]); + + /* get and check distance */ + symbol = decode(s, distcode); + if (symbol < 0) return symbol; /* invalid symbol */ + dist = dists[symbol] + bits(s, dext[symbol]); + if (dist > s->outcnt) + return -10; /* distance too far back */ + + /* copy length bytes from distance bytes back */ + if (s->out != NULL) { + if (s->outcnt + len > s->outlen) return 1; + while (len--) { + s->out[s->outcnt] = s->out[s->outcnt - dist]; + s->outcnt++; + } + } + else + s->outcnt += len; + } + } while (symbol != 256); /* end of block symbol */ + + /* done with a valid fixed or dynamic block */ + return 0; +} + +/* + * Process a fixed codes block. + * + * Format notes: + * + * - This block type can be useful for compressing small amounts of data for + * which the size of the code descriptions in a dynamic block exceeds the + * benefit of custom codes for that block. For fixed codes, no bits are + * spent on code descriptions. Instead the code lengths for literal/length + * codes and distance codes are fixed. The specific lengths for each symbol + * can be seen in the "for" loops below. + * + * - The literal/length code is complete, but has two symbols that are invalid + * and should result in an error if received. This cannot be implemented + * simply as an incomplete code since those two symbols are in the "middle" + * of the code. They are eight bits long and the longest literal/length\ + * code is nine bits. Therefore the code must be constructed with those + * symbols, and the invalid symbols must be detected after decoding. + * + * - The fixed distance codes also have two invalid symbols that should result + * in an error if received. Since all of the distance codes are the same + * length, this can be implemented as an incomplete code. Then the invalid + * codes are detected while decoding. + */ +local int32_t fixed(struct state *s) +{ + static int32_t virgin = 1; + static int16_t lencnt[MAXBITS+1], lensym[FIXLCODES]; + static int16_t distcnt[MAXBITS+1], distsym[MAXDCODES]; + static struct huffman lencode = {lencnt, lensym}; + static struct huffman distcode = {distcnt, distsym}; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + int32_t symbol; + int16_t lengths[FIXLCODES]; + + /* literal/length table */ + for (symbol = 0; symbol < 144; symbol++) + lengths[symbol] = 8; + for (; symbol < 256; symbol++) + lengths[symbol] = 9; + for (; symbol < 280; symbol++) + lengths[symbol] = 7; + for (; symbol < FIXLCODES; symbol++) + lengths[symbol] = 8; + construct(&lencode, lengths, FIXLCODES); + + /* distance table */ + for (symbol = 0; symbol < MAXDCODES; symbol++) + lengths[symbol] = 5; + construct(&distcode, lengths, MAXDCODES); + + /* do this just once */ + virgin = 0; + } + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Process a dynamic codes block. + * + * Format notes: + * + * - A dynamic block starts with a description of the literal/length and + * distance codes for that block. New dynamic blocks allow the compressor to + * rapidly adapt to changing data with new codes optimized for that data. + * + * - The codes used by the deflate format are "canonical", which means that + * the actual bits of the codes are generated in an unambiguous way simply + * from the number of bits in each code. Therefore the code descriptions + * are simply a list of code lengths for each symbol. + * + * - The code lengths are stored in order for the symbols, so lengths are + * provided for each of the literal/length symbols, and for each of the + * distance symbols. + * + * - If a symbol is not used in the block, this is represented by a zero as + * as the code length. This does not mean a zero-length code, but rather + * that no code should be created for this symbol. There is no way in the + * deflate format to represent a zero-length code. + * + * - The maximum number of bits in a code is 15, so the possible lengths for + * any code are 1..15. + * + * - The fact that a length of zero is not permitted for a code has an + * interesting consequence. Normally if only one symbol is used for a given + * code, then in fact that code could be represented with zero bits. However + * in deflate, that code has to be at least one bit. So for example, if + * only a single distance base symbol appears in a block, then it will be + * represented by a single code of length one, in particular one 0 bit. This + * is an incomplete code, since if a 1 bit is received, it has no meaning, + * and should result in an error. So incomplete distance codes of one symbol + * should be permitted, and the receipt of invalid codes should be handled. + * + * - It is also possible to have a single literal/length code, but that code + * must be the end-of-block code, since every dynamic block has one. This + * is not the most efficient way to create an empty block (an empty fixed + * block is fewer bits), but it is allowed by the format. So incomplete + * literal/length codes of one symbol should also be permitted. + * + * - If there are only literal codes and no lengths, then there are no distance + * codes. This is represented by one distance code with zero bits. + * + * - The list of up to 286 length/literal lengths and up to 30 distance lengths + * are themselves compressed using Huffman codes and run-length encoding. In + * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means + * that length, and the symbols 16, 17, and 18 are run-length instructions. + * Each of 16, 17, and 18 are follwed by extra bits to define the length of + * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10 + * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols + * are common, hence the special coding for zero lengths. + * + * - The symbols for 0..18 are Huffman coded, and so that code must be + * described first. This is simply a sequence of up to 19 three-bit values + * representing no code (0) or the code length for that symbol (1..7). + * + * - A dynamic block starts with three fixed-size counts from which is computed + * the number of literal/length code lengths, the number of distance code + * lengths, and the number of code length code lengths (ok, you come up with + * a better name!) in the code descriptions. For the literal/length and + * distance codes, lengths after those provided are considered zero, i.e. no + * code. The code length code lengths are received in a permuted order (see + * the order[] array below) to make a short code length code length list more + * likely. As it turns out, very short and very long codes are less likely + * to be seen in a dynamic code description, hence what may appear initially + * to be a peculiar ordering. + * + * - Given the number of literal/length code lengths (nlen) and distance code + * lengths (ndist), then they are treated as one long list of nlen + ndist + * code lengths. Therefore run-length coding can and often does cross the + * boundary between the two sets of lengths. + * + * - So to summarize, the code description at the start of a dynamic block is + * three counts for the number of code lengths for the literal/length codes, + * the distance codes, and the code length codes. This is followed by the + * code length code lengths, three bits each. This is used to construct the + * code length code which is used to read the remainder of the lengths. Then + * the literal/length code lengths and distance lengths are read as a single + * set of lengths using the code length codes. Codes are constructed from + * the resulting two sets of lengths, and then finally you can start + * decoding actual compressed data in the block. + * + * - For reference, a "typical" size for the code description in a dynamic + * block is around 80 bytes. + */ +local int32_t dynamic(struct state *s) +{ + int32_t nlen, ndist, ncode; /* number of lengths in descriptor */ + int32_t index; /* index of lengths[] */ + int32_t err; /* construct() return value */ + int16_t lengths[MAXCODES]; /* descriptor code lengths */ + int16_t lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */ + int16_t distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */ + struct huffman lencode = {lencnt, lensym}; /* length code */ + struct huffman distcode = {distcnt, distsym}; /* distance code */ + static const int16_t order[19] = /* permutation of code length codes */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* get number of lengths in each table, check lengths */ + nlen = bits(s, 5) + 257; + ndist = bits(s, 5) + 1; + ncode = bits(s, 4) + 4; + if (nlen > MAXLCODES || ndist > MAXDCODES) + return -3; /* bad counts */ + + /* read code length code lengths (really), missing lengths are zero */ + for (index = 0; index < ncode; index++) + lengths[order[index]] = bits(s, 3); + for (; index < 19; index++) + lengths[order[index]] = 0; + + /* build huffman table for code lengths codes (use lencode temporarily) */ + err = construct(&lencode, lengths, 19); + if (err != 0) return -4; /* require complete code set here */ + + /* read length/literal and distance code length tables */ + index = 0; + while (index < nlen + ndist) { + int32_t symbol; /* decoded value */ + int32_t len; /* last length to repeat */ + + symbol = decode(s, &lencode); + if (symbol < 16) /* length in 0..15 */ + lengths[index++] = symbol; + else { /* repeat instruction */ + len = 0; /* assume repeating zeros */ + if (symbol == 16) { /* repeat last length 3..6 times */ + if (index == 0) return -5; /* no last length! */ + len = lengths[index - 1]; /* last length */ + symbol = 3 + bits(s, 2); + } + else if (symbol == 17) /* repeat zero 3..10 times */ + symbol = 3 + bits(s, 3); + else /* == 18, repeat zero 11..138 times */ + symbol = 11 + bits(s, 7); + if (index + symbol > nlen + ndist) + return -6; /* too many lengths! */ + while (symbol--) /* repeat last or zero symbol times */ + lengths[index++] = len; + } + } + + /* build huffman table for literal/length codes */ + err = construct(&lencode, lengths, nlen); + if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1)) + return -7; /* only allow incomplete codes if just one code */ + + /* build huffman table for distance codes */ + err = construct(&distcode, lengths + nlen, ndist); + if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1)) + return -8; /* only allow incomplete codes if just one code */ + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Inflate source to dest. On return, destlen and sourcelen are updated to the + * size of the uncompressed data and the size of the deflate data respectively. + * On success, the return value of puff() is zero. If there is an error in the + * source data, i.e. it is not in the deflate format, then a negative value is + * returned. If there is not enough input available or there is not enough + * output space, then a positive error is returned. In that case, destlen and + * sourcelen are not updated to facilitate retrying from the beginning with the + * provision of more input data or more output space. In the case of invalid + * inflate data (a negative error), the dest and source pointers are updated to + * facilitate the debugging of deflators. + * + * puff() also has a mode to determine the size of the uncompressed output with + * no output written. For this dest must be (uint8_t *)0. In this case, + * the input value of *destlen is ignored, and on return *destlen is set to the + * size of the uncompressed output. + * + * The return codes are: + * + * 2: available inflate data did not terminate + * 1: output space exhausted before completing inflate + * 0: successful inflate + * -1: invalid block type (type == 3) + * -2: stored block length did not match one's complement + * -3: dynamic block code description: too many length or distance codes + * -4: dynamic block code description: code lengths codes incomplete + * -5: dynamic block code description: repeat lengths with no first length + * -6: dynamic block code description: repeat more than specified lengths + * -7: dynamic block code description: invalid literal/length code lengths + * -8: dynamic block code description: invalid distance code lengths + * -9: invalid literal/length or distance code in fixed or dynamic block + * -10: distance is too far back in fixed or dynamic block + * + * Format notes: + * + * - Three bits are read for each block to determine the kind of block and + * whether or not it is the last block. Then the block is decoded and the + * process repeated if it was not the last block. + * + * - The leftover bits in the last byte of the deflate data after the last + * block (if it was a fixed or dynamic block) are undefined and have no + * expected values to check. + */ +int32_t puff(uint8_t *dest, /* pointer to destination pointer */ + uint32_t *destlen, /* amount of output space */ + uint8_t *source, /* pointer to source data pointer */ + uint32_t *sourcelen) /* amount of input available */ +{ + struct state s; /* input/output state */ + int32_t last, type; /* block information */ + int32_t err; /* return value */ + + /* initialize output state */ + s.out = dest; + s.outlen = *destlen; /* ignored if dest is NULL */ + s.outcnt = 0; + + /* initialize input state */ + s.in = source; + s.inlen = *sourcelen; + s.incnt = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + /* return if bits() or decode() tries to read past available input */ + if (setjmp(s.env) != 0) /* if came back here via longjmp() */ + err = 2; /* then skip do-loop, return error */ + else { + /* process blocks until last block or error */ + do { + last = bits(&s, 1); /* one if last block */ + type = bits(&s, 2); /* block type 0..3 */ + err = type == 0 ? stored(&s) : + (type == 1 ? fixed(&s) : + (type == 2 ? dynamic(&s) : + -1)); /* type == 3, invalid */ + if (err != 0) break; /* return with error */ + } while (!last); + } + + /* update the lengths and return */ + if (err <= 0) { + *destlen = s.outcnt; + *sourcelen = s.incnt; + } + return err; +} diff --git a/src/qcommon/puff.h b/src/qcommon/puff.h new file mode 100644 index 00000000..14070f64 --- /dev/null +++ b/src/qcommon/puff.h @@ -0,0 +1,43 @@ +/* + * This is a modified version of Mark Adlers work, + * see below for the original copyright. + * 2006 - Joerg Dietrich + */ + +/* puff.h + Copyright (C) 2002, 2003 Mark Adler, all rights reserved + version 1.7, 3 Mar 2002 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +#ifndef __PUFF_H +#define __PUFF_H + +#include "q_shared.h" /* for definitions of the types */ + +/* + * See puff.c for purpose and usage. + */ +int32_t puff(uint8_t *dest, /* pointer to destination pointer */ + uint32_t *destlen, /* amount of output space */ + uint8_t *source, /* pointer to source data pointer */ + uint32_t *sourcelen); /* amount of input available */ + +#endif // __PUFF_H diff --git a/src/qcommon/q_math.c b/src/qcommon/q_math.c index 196d2f55..0973f31c 100644 --- a/src/qcommon/q_math.c +++ b/src/qcommon/q_math.c @@ -1086,6 +1086,53 @@ void AddPointToBounds( const vec3_t v, vec3_t mins, vec3_t maxs ) { } } +qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs, + const vec3_t mins2, const vec3_t maxs2) +{ + if ( maxs[0] < mins2[0] || + maxs[1] < mins2[1] || + maxs[2] < mins2[2] || + mins[0] > maxs2[0] || + mins[1] > maxs2[1] || + mins[2] > maxs2[2]) + { + return qfalse; + } + + return qtrue; +} + +qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs, + const vec3_t origin, vec_t radius) +{ + if ( origin[0] - radius > maxs[0] || + origin[0] + radius < mins[0] || + origin[1] - radius > maxs[1] || + origin[1] + radius < mins[1] || + origin[2] - radius > maxs[2] || + origin[2] + radius < mins[2]) + { + return qfalse; + } + + return qtrue; +} + +qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs, + const vec3_t origin) +{ + if ( origin[0] > maxs[0] || + origin[0] < mins[0] || + origin[1] > maxs[1] || + origin[1] < mins[1] || + origin[2] > maxs[2] || + origin[2] < mins[2]) + { + return qfalse; + } + + return qtrue; +} vec_t VectorNormalize( vec3_t v ) { // NOTE: TTimo - Apple G4 altivec source uses double? diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h index 50a0b10c..2e3a153c 100644 --- a/src/qcommon/q_shared.h +++ b/src/qcommon/q_shared.h @@ -559,7 +559,7 @@ vec_t VectorLengthSquared( const vec3_t v ); vec_t Distance( const vec3_t p1, const vec3_t p2 ); vec_t DistanceSquared( const vec3_t p1, const vec3_t p2 ); - + void VectorNormalizeFast( vec3_t v ); void VectorInverse( vec3_t v ); @@ -593,6 +593,13 @@ void AxisCopy( vec3_t in[3], vec3_t out[3] ); void SetPlaneSignbits( struct cplane_s *out ); int BoxOnPlaneSide (vec3_t emins, vec3_t emaxs, struct cplane_s *plane); +qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs, + const vec3_t mins2, const vec3_t maxs2); +qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs, + const vec3_t origin, vec_t radius); +qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs, + const vec3_t origin); + float AngleMod(float a); float LerpAngle (float from, float to, float frac); float AngleSubtract( float a1, float a2 ); diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c index c0a703bc..d298b755 100644 --- a/src/qcommon/vm_x86.c +++ b/src/qcommon/vm_x86.c @@ -213,7 +213,7 @@ void callAsmCall(void) // arbitrarily named (though this is not true for the MSC version). When a vm // makes a system call, control jumps straight to the doAsmCall label. void AsmCall( void ) { - asm( CMANG(doAsmCall) ": \n\t" \ + __asm__( CMANG(doAsmCall) ": \n\t" \ " movl (%%edi),%%eax \n\t" \ " subl $4,%%edi \n\t" \ " orl %%eax,%%eax \n\t" \ diff --git a/src/qcommon/vm_x86_64.c b/src/qcommon/vm_x86_64.c index e8e827e5..814acfef 100644 --- a/src/qcommon/vm_x86_64.c +++ b/src/qcommon/vm_x86_64.c @@ -29,9 +29,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include +#include #include #include #include +#include + +//#define USE_GAS +//#define DEBUG_VM #ifdef DEBUG_VM #define Dfprintf(fd, args...) fprintf(fd, ##args) @@ -40,6 +46,19 @@ static FILE* qdasmout; #define Dfprintf(args...) #endif +#define VM_X86_64_MMAP + +#ifndef USE_GAS +void assembler_set_output(char* buf); +size_t assembler_get_code_size(void); +void assembler_init(int pass); +void assemble_line(const char* input, size_t len); +#ifdef Dfprintf +#undef Dfprintf +#define Dfprintf(args...) +#endif +#endif // USE_GAS + static void VM_Destroy_Compiled(vm_t* self); /* @@ -207,8 +226,29 @@ static unsigned char op_argsize[256] = [OP_BLOCK_COPY] = 4, }; +#ifdef USE_GAS #define emit(x...) \ do { fprintf(fh_s, ##x); fputc('\n', fh_s); } while(0) +#else +void emit(const char* fmt, ...) +{ + va_list ap; + char line[4096]; + va_start(ap, fmt); + vsnprintf(line, sizeof(line), fmt, ap); + va_end(ap); + assemble_line(line, strlen(line)); +} +#endif // USE_GAS + +#ifdef USE_GAS +#define JMPIARG \ + emit("jmp i_%08x", iarg); +#else +#define JMPIARG \ + emit("movq $%lu, %%rax", vm->codeBase+vm->instructionPointers[iarg]); \ + emit("jmpq *%rax"); +#endif // integer compare and jump #define IJ(op) \ @@ -216,7 +256,8 @@ static unsigned char op_argsize[256] = emit("movl 4(%%rsi), %%eax"); \ emit("cmpl 8(%%rsi), %%eax"); \ emit(op " i_%08x", instruction+1); \ - emit("jmp i_%08x", iarg); + JMPIARG \ + neednilabel = 1; #ifdef USE_X87 #define FJ(bits, op) \ @@ -226,7 +267,8 @@ static unsigned char op_argsize[256] = emit("fnstsw %%ax");\ emit("testb $" #bits ", %%ah");\ emit(op " i_%08x", instruction+1);\ - emit("jmp i_%08x", iarg); + JMPIARG \ + neednilabel = 1; #define XJ(x) #else #define FJ(x, y) @@ -236,7 +278,8 @@ static unsigned char op_argsize[256] = emit("ucomiss 8(%%rsi), %%xmm0");\ emit("jp i_%08x", instruction+1);\ emit(op " i_%08x", instruction+1);\ - emit("jmp i_%08x", iarg); + JMPIARG \ + neednilabel = 1; #endif #define SIMPLE(op) \ @@ -293,9 +336,14 @@ static unsigned char op_argsize[256] = static void* getentrypoint(vm_t* vm) { +#ifdef USE_GAS return vm->codeBase+64; // skip ELF header +#else + return vm->codeBase; +#endif // USE_GAS } +#ifdef USE_GAS char* mmapfile(const char* fn, size_t* size) { int fd = -1; @@ -383,6 +431,7 @@ static int doas(char* in, char* out, unsigned char** compiledcode) return size; } +#endif // USE_GAS static void block_copy_vm(unsigned dest, unsigned src, unsigned count) { @@ -411,8 +460,13 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { char* code; unsigned iarg = 0; unsigned char barg = 0; - void* entryPoint; + int neednilabel = 0; + struct timeval tvstart = {0, 0}; +#ifdef USE_GAS + byte* compiledcode; + int compiledsize; + void* entryPoint; char fn_s[2*MAX_QPATH]; // output file for assembler code char fn_o[2*MAX_QPATH]; // file written by as #ifdef DEBUG_VM @@ -420,16 +474,16 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { #endif FILE* fh_s; int fd_s, fd_o; - byte* compiledcode; - int compiledsize; + + gettimeofday(&tvstart, NULL); Com_Printf("compiling %s\n", vm->name); #ifdef DEBUG_VM snprintf(fn_s, sizeof(fn_s), "%.63s.s", vm->name); snprintf(fn_o, sizeof(fn_o), "%.63s.o", vm->name); - fd_s = open(fn_s, O_CREAT|O_WRONLY, 0644); - fd_o = open(fn_o, O_CREAT|O_WRONLY, 0644); + fd_s = open(fn_s, O_CREAT|O_WRONLY|O_TRUNC, 0644); + fd_o = open(fn_o, O_CREAT|O_WRONLY|O_TRUNC, 0644); #else snprintf(fn_s, sizeof(fn_s), "/tmp/%.63s.s_XXXXXX", vm->name); snprintf(fn_o, sizeof(fn_o), "/tmp/%.63s.o_XXXXXX", vm->name); @@ -463,25 +517,50 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { return; } - // translate all instructions - pc = 0; - code = (char *)header + header->codeOffset; - emit("start:"); emit("or %%r8, %%r8"); // check whether to set up instruction pointers emit("jnz main"); emit("jmp setupinstructionpointers"); emit("main:"); +#else // USE_GAS + int pass; + size_t compiledOfs = 0; + + gettimeofday(&tvstart, NULL); + + for (pass = 0; pass < 2; ++pass) { + + if(pass) + { + compiledOfs = assembler_get_code_size(); + vm->codeLength = compiledOfs; + vm->codeBase = mmap(NULL, compiledOfs, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if(vm->codeBase == (void*)-1) + Com_Error(ERR_DROP, "VM_CompileX86: can't mmap memory"); + + assembler_set_output((char*)vm->codeBase); + } + + assembler_init(pass); + +#endif // USE_GAS + + // translate all instructions + pc = 0; + code = (char *)header + header->codeOffset; for ( instruction = 0; instruction < header->instructionCount; ++instruction ) { op = code[ pc ]; ++pc; - vm->instructionPointers[instruction] = pc; +#ifndef USE_GAS + vm->instructionPointers[instruction] = assembler_get_code_size(); +#endif -#if 0 + /* store current instruction number in r15 for debugging */ +#if 1 emit("nop"); emit("movq $%d, %%r15", instruction); emit("nop"); @@ -502,7 +581,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { { Dfprintf(qdasmout, "%s\n", opnames[op]); } + +#ifdef USE_GAS emit("i_%08x:", instruction); +#else + if(neednilabel) + { + emit("i_%08x:", instruction); + neednilabel = 0; + } +#endif + switch ( op ) { case OP_UNDEF: @@ -561,6 +650,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { // emit("frstor 4(%%rsi)"); emit("addq $4, %%rsi"); emit("movl %%eax, (%%rsi)"); // store return value + neednilabel = 1; break; case OP_PUSH: emit("addq $4, %%rsi"); @@ -629,7 +719,8 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { emit("jp dojump_i_%08x", instruction); emit("jz i_%08x", instruction+1); emit("dojump_i_%08x:", instruction); - emit("jmp i_%08x", iarg); + JMPIARG + neednilabel = 1; #endif break; case OP_LTF: @@ -856,7 +947,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { } } - +#ifdef USE_GAS emit("setupinstructionpointers:"); emit("movq $%lu, %%rax", (unsigned long)vm->instructionPointers); for ( instruction = 0; instruction < header->instructionCount; ++instruction ) @@ -889,8 +980,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { vm->codeBase = compiledcode; // remember to skip ELF header! vm->codeLength = compiledsize; +#else // USE_GAS + } + assembler_init(0); + + if(mprotect(vm->codeBase, compiledOfs, PROT_READ|PROT_EXEC)) + Com_Error(ERR_DROP, "VM_CompileX86: mprotect failed"); +#endif // USE_GAS + vm->destroy = VM_Destroy_Compiled; +#ifdef USE_GAS entryPoint = getentrypoint(vm); // __asm__ __volatile__ ("int3"); @@ -911,8 +1011,6 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { fclose(qdasmout); #endif - Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength ); - out: close(fd_o); @@ -923,12 +1021,30 @@ out: unlink(fn_s); } #endif +#endif // USE_GAS + + if(vm->compiled) + { + struct timeval tvdone = {0, 0}; + struct timeval dur = {0, 0}; + Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength ); + + gettimeofday(&tvdone, NULL); + timersub(&tvdone, &tvstart, &dur); + Com_Printf( "compilation took %lu.%06lu seconds\n", dur.tv_sec, dur.tv_usec ); + } } void VM_Destroy_Compiled(vm_t* self) { +#ifdef USE_GAS munmap(self->codeBase, self->codeLength); +#elif _WIN32 + VirtualFree(self->codeBase, self->codeLength, MEM_RELEASE); +#else + munmap(self->codeBase, self->codeLength); +#endif } /* diff --git a/src/qcommon/vm_x86_64_assembler.c b/src/qcommon/vm_x86_64_assembler.c new file mode 100644 index 00000000..1eda764f --- /dev/null +++ b/src/qcommon/vm_x86_64_assembler.c @@ -0,0 +1,1419 @@ +/* +=========================================================================== +vm_x86_64_assembler.c -- assembler for x86-64 + +Copyright (C) 2007 Ludwig Nussel , Novell inc. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +#include +#include +#include +#include + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; + +static char* out; +static unsigned compiledOfs; +static unsigned assembler_pass; + +static const char* cur_line; + +static FILE* fout; + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +#define crap(fmt, args...) do { \ + _crap(__FUNCTION__, fmt, ##args); \ +} while(0) + +#define CRAP_INVALID_ARGS crap("invalid arguments %s, %s", argtype2str(arg1.type),argtype2str(arg2.type)); + +#ifdef DEBUG +#define debug(fmt, args...) printf(fmt, ##args) +#else +#define debug(fmt, args...) +#endif + +static void _crap(const char* func, const char* fmt, ...) +{ + va_list ap; + fprintf(stderr, "%s() - ", func); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + if(cur_line && cur_line[0]) + fprintf(stderr, "-> %s\n", cur_line); + exit(1); +} + +static void emit1(unsigned char v) +{ + if(assembler_pass) + { + out[compiledOfs++] = v; + if(fout) fwrite(&v, 1, 1, fout); + debug("%02hhx ", v); + } + else + { + ++compiledOfs; + } +} + +static inline void emit2(u16 v) +{ + emit1(v&0xFF); + emit1((v>>8)&0xFF); +} + +static inline void emit4(u32 v) +{ + emit1(v&0xFF); + emit1((v>>8)&0xFF); + emit1((v>>16)&0xFF); + emit1((v>>24)&0xFF); +} + +static inline void emit8(u64 v) +{ + emit4(v&0xFFFFFFFF); + emit4((v>>32)&0xFFFFFFFF); +} + +enum { + REX_W = 0x08, + REX_R = 0x04, + REX_X = 0x02, + REX_B = 0x01, +}; + +enum { + MODRM_MOD_00 = 0x00, + MODRM_MOD_01 = 0x01 << 6, + MODRM_MOD_10 = 0x02 << 6, + MODRM_MOD_11 = 0x03 << 6, + MODRM_RM_SIB = 0x04, +}; + +typedef enum +{ + T_NONE = 0x00, + T_REGISTER = 0x01, + T_IMMEDIATE = 0x02, + T_MEMORY = 0x04, + T_LABEL = 0x08, + T_ABSOLUTE = 0x80 +} argtype_t; + +typedef enum { + R_8 = 0x100, + R_16 = 0x200, + R_64 = 0x800, + R_MSZ = 0xF00, // size mask + R_XMM = 0x2000, // xmm register. year, sucks + R_EAX = 0x00, + R_EBX = 0x03, + R_ECX = 0x01, + R_EDX = 0x02, + R_ESI = 0x06, + R_EDI = 0x07, + R_ESP = 0x04, + R_RAX = R_EAX | R_64, + R_RBX = R_EBX | R_64, + R_RCX = R_ECX | R_64, + R_RDX = R_EDX | R_64, + R_RSI = R_ESI | R_64, + R_RDI = R_EDI | R_64, + R_RSP = R_ESP | R_64, + R_R8 = 0x08 | R_64, + R_R9 = 0x09 | R_64, + R_R10 = 0x0A | R_64, + R_R15 = 0x0F | R_64, + R_AL = R_EAX | R_8, + R_AX = R_EAX | R_16, + R_CL = R_ECX | R_8, + R_XMM0 = 0x00 | R_XMM, + R_MGP = 0x0F, // mask for general purpose registers +} reg_t; + +typedef enum { + MODRM_SIB = 0, + MODRM_NOSIB = 0x3, +} modrm_sib_t; + +typedef struct { + unsigned disp; + argtype_t basetype; + union { + u64 imm; + reg_t reg; + } base; + argtype_t indextype; + union { + u64 imm; + reg_t reg; + } index; + unsigned scale; +} memref_t; + +#define LABELLEN 32 + +typedef struct { + argtype_t type; + union { + u64 imm; + reg_t reg; + memref_t mem; + char label[LABELLEN]; + } v; + int absolute:1; +} arg_t; + +typedef void (*emitfunc)(const char* op, arg_t arg1, arg_t arg2, void* data); + +typedef struct { + char* mnemonic; + emitfunc func; + void* data; +} op_t; + +typedef struct { + u8 xmmprefix; + u8 subcode; // in modrm + u8 rmcode; // opcode for reg/mem, reg + u8 mrcode; // opcode for reg, reg/mem + u8 rcode8; // opcode for reg8/mem8 + u8 rcode; // opcode for reg/mem +} opparam_t; + +/* ************************* */ + +static unsigned hashkey(const char *string, unsigned len) { + unsigned register hash, i; + + hash = 0; + for (i = 0; i < len && string[i] != '\0'; ++i) { + hash += string[i] * (119 + i); + } + hash = (hash ^ (hash >> 10) ^ (hash >> 20)); + return hash; +} + +struct hashentry { + char* label; + unsigned address; + struct hashentry* next; +}; +static struct hashentry* labelhash[1021]; + +// no dup check! +static void hash_add_label(const char* label, unsigned address) +{ + struct hashentry* h; + unsigned i = hashkey(label, -1U); + i %= sizeof(labelhash)/sizeof(labelhash[0]); + h = malloc(sizeof(struct hashentry)); + h->label = strdup(label); + h->address = address; + h->next = labelhash[i]; + labelhash[i] = h; +} + +static unsigned lookup_label(const char* label) +{ + struct hashentry* h; + unsigned i = hashkey(label, -1U); + i %= sizeof(labelhash)/sizeof(labelhash[0]); + for(h = labelhash[i]; h; h = h->next ) + { + if(!strcmp(h->label, label)) + return h->address; + } + if(assembler_pass) + crap("label %s undefined", label); + return 0; +} + +static void labelhash_free(void) +{ + struct hashentry* h; + unsigned i; + unsigned z = 0, min = -1U, max = 0, t = 0; + for ( i = 0; i < sizeof(labelhash)/sizeof(labelhash[0]); ++i) + { + unsigned n = 0; + h = labelhash[i]; + while(h) + { + struct hashentry* next = h->next; + free(h->label); + free(h); + h = next; + ++n; + } + t+=n; + if(!n) ++z; + //else printf("%u\n", n); + min = MIN(min, n); + max = MAX(max, n); + } + printf("total %u, hsize %lu, zero %u, min %u, max %u\n", t, sizeof(labelhash)/sizeof(labelhash[0]), z, min, max); + memset(labelhash, 0, sizeof(labelhash)); +} + +/* ************************* */ + + +static const char* argtype2str(argtype_t t) +{ + switch(t) + { + case T_NONE: return "none"; + case T_REGISTER: return "register"; + case T_IMMEDIATE: return "immediate"; + case T_MEMORY: return "memory"; + case T_LABEL: return "label"; + default: crap("invalid type"); + } + /* not reached */ + return T_NONE; +} + +/* ************************* */ + +static inline int iss8(u64 v) +{ + return (labs(v) <= 0x80); +} + +static inline int isu8(u64 v) +{ + return (v <= 0xff); +} + +static inline int iss16(u64 v) +{ + return (labs(v) <= 0x8000); +} + +static inline int isu16(u64 v) +{ + return (v <= 0xffff); +} + +static inline int iss32(u64 v) +{ + return (labs(v) <= 0x80000000); +} + +static inline int isu32(u64 v) +{ + return (v <= 0xffffffff); +} + +static void emit_opsingle(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 op = (u8)((unsigned long) data); + + if(arg1.type != T_NONE || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + emit1(op); +} + +static void emit_opsingle16(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + emit1(0x66); + emit_opsingle(mnemonic, arg1, arg2, data); +} + +static void compute_rexmodrmsib(u8* rex_r, u8* modrm_r, u8* sib_r, arg_t* arg1, arg_t* arg2) +{ + u8 rex = 0; + u8 modrm = 0; + u8 sib = 0; + + if((arg1->type == T_REGISTER && arg2->type == T_REGISTER) + && ((arg1->v.reg & R_MSZ) != (arg2->v.reg & R_MSZ)) + && !((arg1->v.reg & R_XMM) || (arg2->v.reg & R_XMM))) + crap("both registers must be of same width"); + + if((arg1->type == T_REGISTER && arg1->v.reg & R_64) + || (arg2->type == T_REGISTER && arg2->v.reg & R_64)) + { + rex |= REX_W; + } + + if(arg1->type == T_REGISTER) + { + if((arg1->v.reg & R_MGP) > 0x07) + rex |= REX_R; + + modrm |= (arg1->v.reg & 0x07) << 3; + } + + if(arg2->type == T_REGISTER) + { + if((arg2->v.reg & R_MGP) > 0x07) + rex |= REX_B; + + modrm |= (arg2->v.reg & 0x07); + } + + if(arg2->type == T_MEMORY) + { + if((arg2->v.mem.basetype == T_REGISTER && !(arg2->v.mem.base.reg & R_64)) + || (arg2->v.mem.indextype == T_REGISTER && !(arg2->v.mem.index.reg & R_64))) + { + crap("only 64bit base/index registers are %x %x", arg2->v.mem.base.reg, arg2->v.mem.index.reg); + } + + if(arg2->v.mem.indextype == T_REGISTER) + { + modrm |= MODRM_RM_SIB; + if(!arg2->v.mem.disp) + { + modrm |= MODRM_MOD_00; + } + else if(iss8(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_01; + } + else if(isu32(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_10; + } + else + { + crap("invalid displacement"); + } + + if((arg2->v.mem.index.reg & R_MGP) > 0x07) + rex |= REX_X; + + if((arg2->v.mem.base.reg & R_MGP) > 0x07) + rex |= REX_B; + + if(arg2->v.mem.basetype != T_REGISTER) + crap("base must be register"); + switch(arg2->v.mem.scale) + { + case 1: break; + case 2: sib |= 1 << 6; break; + case 4: sib |= 2 << 6; break; + case 8: sib |= 3 << 6; break; + } + sib |= (arg2->v.mem.index.reg & 0x07) << 3; + sib |= (arg2->v.mem.base.reg & 0x07); + } + else if(arg2->v.mem.indextype == T_NONE) + { + if(!arg2->v.mem.disp) + { + modrm |= MODRM_MOD_00; + } + else if(iss8(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_01; + } + else if(isu32(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_10; + } + else + { + crap("invalid displacement"); + } + + if(arg2->v.mem.basetype != T_REGISTER) + crap("todo: base != register"); + + if((arg2->v.mem.base.reg & R_MGP) > 0x07) + rex |= REX_B; + + modrm |= arg2->v.mem.base.reg & 0x07; + } + else + { + crap("invalid indextype"); + } + } + else + { + modrm |= MODRM_MOD_11; + } + + if(rex) + rex |= 0x40; // XXX + + *rex_r = rex; + *modrm_r = modrm; + *sib_r = sib; +} + +static void maybe_emit_displacement(arg_t* arg) +{ + if(arg->type != T_MEMORY) + return; + + if(arg->v.mem.disp) + { + if(iss8(arg->v.mem.disp)) + { + emit1((u8)arg->v.mem.disp); + } + else if(isu32(arg->v.mem.disp)) + { + emit4(arg->v.mem.disp); + } + else + { + crap("invalid displacement"); + } + } +} + +/* one byte operator with register added to operator */ +static void emit_opreg(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 op = (u8)((unsigned long) data); + + if(arg1.type != T_REGISTER || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + if((arg1.v.reg & R_MGP) > 0x07) + emit1(0x40 | REX_B); + + op |= (arg1.v.reg & 0x07); + + emit1(op); +} + +/* operator which operates on reg/mem */ +static void emit_op_rm(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + opparam_t* params = data; + + if((arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + modrm |= params->subcode << 3; + + if(arg1.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg1.v.reg & R_8) + emit1(params->rcode8); // op reg8/mem8, + else + emit1(params->rcode); // op reg/mem, + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); +} + +/* operator which operates on reg/mem with cl */ +static void emit_op_rm_cl(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + opparam_t* params = data; + + if(arg2.type != T_REGISTER || arg1.type != T_REGISTER) + CRAP_INVALID_ARGS; + + if((arg1.v.reg & R_MGP) != R_ECX && !(arg1.v.reg & R_8)) + crap("only cl register is valid"); + + arg1.type = T_NONE; // don't complain, we know it's cl anyways + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + modrm |= params->subcode << 3; + + if(arg2.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg2.v.reg & R_8) + emit1(params->rcode8); // op reg8/mem8, + else + emit1(params->rcode); // op reg/mem, + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); +} + +static void emit_mov(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex = 0; + u8 modrm = 0; + u8 sib = 0; + + if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER) + { + u8 op = 0xb8; + + if(arg2.v.reg & R_8) + { + if(!isu8(arg1.v.imm)) + crap("value too large for 8bit register"); + + op = 0xb0; + } + else if(arg2.v.reg & R_16) + { + if(!isu16(arg1.v.imm)) + crap("value too large for 16bit register"); + emit1(0x66); + } + else if(!arg2.v.reg & R_64) + { + if(!isu32(arg1.v.imm)) + crap("value too large for 32bit register"); + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(rex) emit1(rex); + + op |= (arg2.v.reg & 0x07); + + emit1(op); + + if(arg2.v.reg & R_8) emit1(arg1.v.imm); + else if(arg2.v.reg & R_16) emit2(arg1.v.imm); + else if(arg2.v.reg & R_64) emit8(arg1.v.imm); + else emit4(arg1.v.imm); + } + else if(arg1.type == T_IMMEDIATE && arg2.type == T_MEMORY) + { + if(!iss32(arg1.v.imm)) + { + crap("only 32bit immediates supported"); + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + if(rex) emit1(rex); + emit1(0xc7); // mov reg/mem, imm + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + emit4(arg1.v.imm); + } + else if(arg1.type == T_REGISTER && arg2.type == T_REGISTER) // XXX: same as next + { + if(arg1.type != T_REGISTER || arg2.type != T_REGISTER) + crap("both args must be registers"); + + if((arg1.v.reg & R_MSZ) != (arg2.v.reg & R_MSZ)) + crap("both registers must be same width"); + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(rex) emit1(rex); + emit1(0x89); // mov reg reg/mem, + emit1(modrm); + } + else if(arg1.type == T_REGISTER && arg2.type == T_MEMORY) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(arg1.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg1.v.reg & R_8) + emit1(0x88); // mov reg reg/mem, + else + emit1(0x89); // mov reg reg/mem, + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); + } + else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + if(arg2.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg2.v.reg & R_8) + emit1(0x8a); // mov reg/mem, reg + else + emit1(0x8b); // mov reg/mem, reg + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); + } + else + CRAP_INVALID_ARGS; +} + +static void emit_subaddand(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex = 0; + u8 modrm = 0; + u8 sib = 0; + + opparam_t* params = data; + + if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER) + { + if(!iss32(arg1.v.imm)) + { + crap("only 8 and 32 bit immediates supported"); + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + modrm |= params->subcode << 3; + + if(rex) emit1(rex); +#if 0 + if(isu8(arg1.v.imm)) + { + emit1(0x83); // sub reg/mem, imm8 + emit1(modrm); + emit1(arg1.v.imm&0xFF); + } + else +#endif + { + emit1(0x81); // sub reg/mem, imm32 + emit1(modrm); + emit4(arg1.v.imm); + } + } + else if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER)) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(rex) emit1(rex); + emit1(params->rmcode); // sub reg/mem, reg + emit1(modrm); + if(arg2.type == T_MEMORY && (modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); + } + else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + if(rex) emit1(rex); + emit1(params->mrcode); // sub reg, reg/mem + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); + } + else + CRAP_INVALID_ARGS; +} + +static void emit_condjump(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + unsigned off; + int disp; + unsigned char opcode = (unsigned char)(((unsigned long)data)&0xFF); + + if(arg1.type != T_LABEL || arg2.type != T_NONE) + crap("%s: argument must be label", mnemonic); + + emit1(opcode); + + off = lookup_label(arg1.v.label); + disp = off-(compiledOfs+1); + if(assembler_pass && abs(disp) > 127) + crap("cannot jump that far (%x -> %x = %x)", compiledOfs, off, disp); + + emit1(disp); +} + +static void emit_jmp(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + if((arg1.type != T_LABEL && arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + if(arg1.type == T_LABEL) + { + unsigned off; + int disp; + + off = lookup_label(arg1.v.label); + disp = off-(compiledOfs+5); + emit1(0xe9); + emit4(disp); + } + else + { + u8 rex, modrm, sib; + + if(arg1.type == T_REGISTER) + { + if(!arg1.absolute) + crap("jmp must be absolute"); + + if((arg1.v.reg & R_64) != R_64) + crap("register must be 64bit"); + + arg1.v.reg ^= R_64; // no rex required for call + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + modrm |= 0x4 << 3; + + if(rex) emit1(rex); + emit1(0xff); + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + maybe_emit_displacement(&arg1); + } +} + +static void emit_call(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + + if(arg1.type != T_REGISTER || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + if(!arg1.absolute) + crap("call must be absolute"); + + if((arg1.v.reg & R_64) != R_64) + crap("register must be 64bit"); + + arg1.v.reg ^= R_64; // no rex required for call + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + modrm |= 0x2 << 3; + + if(rex) emit1(rex); + emit1(0xff); + emit1(modrm); +} + + +static void emit_twobyte(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + + opparam_t* params = data; + + if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER)) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(params->xmmprefix) emit1(params->xmmprefix); + if(rex) emit1(rex); + emit1(0x0f); + emit1(params->rmcode); // sub reg/mem, reg + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); + } + else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + if(params->xmmprefix) emit1(params->xmmprefix); + if(rex) emit1(rex); + emit1(0x0f); + emit1(params->mrcode); // sub reg, reg/mem + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); + } + else + CRAP_INVALID_ARGS; +} + +static opparam_t params_add = { subcode: 0, rmcode: 0x01, }; +static opparam_t params_or = { subcode: 1, rmcode: 0x09, }; +static opparam_t params_and = { subcode: 4, rmcode: 0x21, }; +static opparam_t params_sub = { subcode: 5, rmcode: 0x29, }; +static opparam_t params_xor = { subcode: 6, rmcode: 0x31, }; +static opparam_t params_cmp = { subcode: 6, rmcode: 0x39, mrcode: 0x3b, }; +static opparam_t params_dec = { subcode: 1, rcode: 0xff, rcode8: 0xfe, }; +static opparam_t params_sar = { subcode: 7, rcode: 0xd3, rcode8: 0xd2, }; +static opparam_t params_shl = { subcode: 4, rcode: 0xd3, rcode8: 0xd2, }; +static opparam_t params_shr = { subcode: 5, rcode: 0xd3, rcode8: 0xd2, }; +static opparam_t params_idiv = { subcode: 7, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_div = { subcode: 6, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_imul = { subcode: 5, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_mul = { subcode: 4, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_neg = { subcode: 3, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_not = { subcode: 2, rcode: 0xf7, rcode8: 0xf6, }; + +static opparam_t params_cvtsi2ss = { xmmprefix: 0xf3, rmcode: 0x2a }; +static opparam_t params_cvttss2si = { xmmprefix: 0xf3, rmcode: 0x2c }; +static opparam_t params_addss = { xmmprefix: 0xf3, mrcode: 0x58 }; +static opparam_t params_divss = { xmmprefix: 0xf3, mrcode: 0x5e }; +static opparam_t params_movss = { xmmprefix: 0xf3, mrcode: 0x10, rmcode: 0x11 }; +static opparam_t params_mulss = { xmmprefix: 0xf3, mrcode: 0x59 }; +static opparam_t params_subss = { xmmprefix: 0xf3, mrcode: 0x5c }; +static opparam_t params_ucomiss = { mrcode: 0x2e }; + +static int ops_sorted = 0; +static op_t ops[] = { + { "addl", emit_subaddand, ¶ms_add }, + { "addq", emit_subaddand, ¶ms_add }, + { "addss", emit_twobyte, ¶ms_addss }, + { "andl", emit_subaddand, ¶ms_and }, + { "andq", emit_subaddand, ¶ms_and }, + { "callq", emit_call, NULL }, + { "cbw", emit_opsingle16, (void*)0x98 }, + { "cdq", emit_opsingle, (void*)0x99 }, + { "cmpl", emit_subaddand, ¶ms_cmp }, + { "cmpq", emit_subaddand, ¶ms_cmp }, + { "cvtsi2ss", emit_twobyte, ¶ms_cvtsi2ss }, + { "cvttss2si", emit_twobyte, ¶ms_cvttss2si }, + { "cwde", emit_opsingle, (void*)0x98 }, + { "decl", emit_op_rm, ¶ms_dec }, + { "decq", emit_op_rm, ¶ms_dec }, + { "divl", emit_op_rm, ¶ms_div }, + { "divq", emit_op_rm, ¶ms_div }, + { "divss", emit_twobyte, ¶ms_divss }, + { "idivl", emit_op_rm, ¶ms_idiv }, + { "imull", emit_op_rm, ¶ms_imul }, + { "int3", emit_opsingle, (void*)0xcc }, + { "ja", emit_condjump, (void*)0x77 }, + { "jbe", emit_condjump, (void*)0x76 }, + { "jb", emit_condjump, (void*)0x72 }, + { "je", emit_condjump, (void*)0x74 }, + { "jl", emit_condjump, (void*)0x7c }, + { "jmp", emit_jmp, NULL }, + { "jmpq", emit_jmp, NULL }, + { "jnae", emit_condjump, (void*)0x72 }, + { "jna", emit_condjump, (void*)0x76 }, + { "jnbe", emit_condjump, (void*)0x77 }, + { "jnb", emit_condjump, (void*)0x73 }, + { "jnc", emit_condjump, (void*)0x73 }, + { "jne", emit_condjump, (void*)0x75 }, + { "jnge", emit_condjump, (void*)0x7c }, + { "jng", emit_condjump, (void*)0x7e }, + { "jnle", emit_condjump, (void*)0x7f }, + { "jnl", emit_condjump, (void*)0x7d }, + { "jnz", emit_condjump, (void*)0x75 }, + { "jp", emit_condjump, (void*)0x7a }, + { "jz", emit_condjump, (void*)0x74 }, + { "movb", emit_mov, NULL }, + { "movl", emit_mov, NULL }, + { "movq", emit_mov, NULL }, + { "movss", emit_twobyte, ¶ms_movss }, + { "movw", emit_mov, NULL }, + { "mull", emit_op_rm, ¶ms_mul }, + { "mulss", emit_twobyte, ¶ms_mulss }, + { "negl", emit_op_rm, ¶ms_neg }, + { "negq", emit_op_rm, ¶ms_neg }, + { "nop", emit_opsingle, (void*)0x90 }, + { "notl", emit_op_rm, ¶ms_not }, + { "notq", emit_op_rm, ¶ms_not }, + { "or", emit_subaddand, ¶ms_or }, + { "orl", emit_subaddand, ¶ms_or }, + { "pop", emit_opreg, (void*)0x58 }, + { "push", emit_opreg, (void*)0x50 }, + { "ret", emit_opsingle, (void*)0xc3 }, + { "sarl", emit_op_rm_cl, ¶ms_sar }, + { "shl", emit_op_rm_cl, ¶ms_shl }, + { "shrl", emit_op_rm_cl, ¶ms_shr }, + { "subl", emit_subaddand, ¶ms_sub }, + { "subq", emit_subaddand, ¶ms_sub }, + { "subss", emit_twobyte, ¶ms_subss }, + { "ucomiss", emit_twobyte, ¶ms_ucomiss }, + { "xorl", emit_subaddand, ¶ms_xor }, + { "xorq", emit_subaddand, ¶ms_xor }, + { NULL, NULL, NULL } +}; + +static int opsort(const void* A, const void* B) +{ + const op_t* a = A; + const op_t* b = B; + return strcmp(a->mnemonic, b->mnemonic); +} + +static op_t* getop(const char* n) +{ +#if 0 + op_t* o = ops; + while(o->mnemonic) + { + if(!strcmp(o->mnemonic, n)) + return o; + ++o; + } + +#else + unsigned m, t, b; + int r; + t = sizeof(ops)/sizeof(ops[0])-1; + b = 0; + + while(b <= t) + { + m = ((t-b)>>1) + b; + if((r = strcmp(ops[m].mnemonic, n)) == 0) + { + return &ops[m]; + } + else if(r < 0) + { + b = m + 1; + } + else + { + t = m - 1; + } + } +#endif + + return NULL; +} + +static reg_t parsereg(const char* str) +{ + const char* s = str; + if(*s == 'a' && s[1] == 'l' && !s[2]) + { + return R_AL; + } + else if(*s == 'a' && s[1] == 'x' && !s[2]) + { + return R_AX; + } + if(*s == 'c' && s[1] == 'l' && !s[2]) + { + return R_CL; + } + if(*s == 'x') + { + if(!strcmp(s, "xmm0")) + return R_XMM0; + } + else if(*s == 'r' && s[1]) + { + ++s; + if(s[1] == 'x') + { + switch(*s++) + { + case 'a': return R_RAX; + case 'b': return R_RBX; + case 'c': return R_RCX; + case 'd': return R_RDX; + } + } + else if(s[1] == 'i') + { + switch(*s++) + { + case 's': return R_RSI; + case 'd': return R_RDI; + } + } + else if(s[0] == 's' && s[1] == 'p' && !s[2]) + { + return R_RSP; + } + else if(*s == '8' && !s[1]) + return R_R8; + else if(*s == '9' && !s[1]) + return R_R9; + else if(*s == '1' && s[1] == '0') + return R_R10; + else if(*s == '1' && s[1] == '5') + return R_R15; + } + else if(*s == 'e' && s[1]) + { + ++s; + if(s[1] == 'x') + { + switch(*s++) + { + case 'a': return R_EAX; + case 'b': return R_EBX; + case 'c': return R_ECX; + case 'd': return R_EDX; + } + } + else if(s[1] == 'i') + { + switch(*s++) + { + case 's': return R_ESI; + case 'd': return R_EDI; + } + } + } + + crap("invalid register %s", str); + + return 0; +} + +typedef enum { + TOK_LABEL = 0x80, + TOK_INT = 0x81, + TOK_END = 0x82, + TOK_INVALID = 0x83, +} token_t; + +static unsigned char nexttok(const char** str, char* label, u64* val) +{ + const char* s = *str; + + if(label) *label = 0; + if(val) *val = 0; + + while(*s && *s == ' ') ++s; + + if(!*s) + { + return TOK_END; + } + else if(*s == '$' || *s == '*' || *s == '%' || *s == '-' || *s == ')' || *s == '(' || *s == ',') + { + *str = s+1; + return *s; + } + else if(*s >= 'a' && *s <= 'z') + { + size_t a = strspn(s+1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"); + if(a+1 >= LABELLEN) + crap("label %s too long", s); + if(label) + { + strncpy(label, s, a+1); + label[a+1] = 0; + } + *str = s+a+1; + return TOK_LABEL; + } + else if(*s >= '0' && *s <= '9') + { + char* endptr = NULL; + u64 v = strtol(s, &endptr, 0); + if(endptr && (endptr-s == 0)) + crap("invalid integer %s", s); + if(val) *val = v; + *str = endptr; + return TOK_INT; + } + crap("can't parse '%s'", *str); + return TOK_INVALID; +} + +static arg_t parsearg(const char** str) +{ + arg_t arg; + const char* s = *str; + char label[20]; + u64 val; + int negative = 1; + unsigned ttype; + + arg.type = T_NONE; + arg.absolute = 0; + + while(*s && *s == ' ') ++s; + + switch(nexttok(&s, label, &val)) + { + case '$' : + ttype = nexttok(&s, NULL, &val); + if(ttype == '-') + { + negative = -1; + ttype = nexttok(&s, NULL, &val); + } + if(ttype != TOK_INT) + crap("expected integer"); + arg.type = T_IMMEDIATE; + arg.v.imm = negative * val; + break; + case '*' : + if((ttype = nexttok(&s, NULL, NULL)) != '%') + { + if(ttype == '(') + goto tok_memory; + crap("expected '%%'"); + } + arg.absolute = 1; + /* fall through */ + case '%' : + if(nexttok(&s, label, &val) != TOK_LABEL) + crap("expected label"); + arg.type = T_REGISTER; + arg.v.reg = parsereg(label); + break; + case TOK_LABEL: + arg.type = T_LABEL; + strncpy(arg.v.label, label, LABELLEN); + break; + case '-': + negative = -1; + if(nexttok(&s, NULL, &val) != TOK_INT) + crap("expected integer"); + /* fall through */ + case TOK_INT: + if(nexttok(&s, label, NULL) != '(') + crap("expected '('"); // mov to/from fixed address not supported + /* fall through */ + case '(': +tok_memory: + arg.type = T_MEMORY; + arg.v.mem.indextype = T_NONE; + arg.v.mem.disp = negative * val; + ttype = nexttok(&s, label, &val); + if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL) + { + crap("expected register"); + } + if (ttype == '%') + { + arg.v.mem.basetype = T_REGISTER; + arg.v.mem.base.reg = parsereg(label); + } + else if (ttype == TOK_INT) + { + arg.v.mem.basetype = T_IMMEDIATE; + arg.v.mem.base.imm = val; + } + if((ttype = nexttok(&s, NULL, NULL)) == ',') + { + ttype = nexttok(&s, label, &val); + if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL) + { + crap("expected register"); + } + if (ttype == '%') + { + arg.v.mem.indextype = T_REGISTER; + arg.v.mem.index.reg = parsereg(label); + } + else if (ttype == TOK_INT) + { + crap("index must be register"); + arg.v.mem.indextype = T_IMMEDIATE; + arg.v.mem.index.imm = val; + } + if(nexttok(&s, NULL, NULL) != ',') + crap("expected ','"); + if(nexttok(&s, NULL, &val) != TOK_INT) + crap("expected integer"); + if(val != 1 && val != 2 && val != 4 && val != 8) + crap("scale must 1, 2, 4 or 8"); + arg.v.mem.scale = val; + + ttype = nexttok(&s, NULL, NULL); + } + if(ttype != ')') + { + crap("expected ')' or ','"); + } + break; + default: + crap("invalid token %hhu in %s", *(unsigned char*)s, *str); + break; + } + + *str = s; + + return arg; +} + +/* ************************* */ + +void assembler_init(int pass) +{ + compiledOfs = 0; + assembler_pass = pass; + if(!pass) + { + labelhash_free(); + cur_line = NULL; + } + if(!ops_sorted) + { + ops_sorted = 1; + qsort(ops, sizeof(ops)/sizeof(ops[0])-1, sizeof(ops[0]), opsort); + } +} + +size_t assembler_get_code_size(void) +{ + return compiledOfs; +} + +void assembler_set_output(char* buf) +{ + out = buf; +} + +void assemble_line(const char* input, size_t len) +{ + char line[4096]; + char* s; + op_t* o; + char* opn; + arg_t arg1, arg2; + + arg1.type = T_NONE; + arg2.type = T_NONE; + opn = NULL; + o = NULL; + + if(len < 1) + return; + + if(len >= sizeof(line)) + crap("line too long"); + + memcpy(line, input, sizeof(line)); + cur_line = input; + + if(line[len-1] == '\n') line[--len] = 0; + if(line[len-1] == ':') + { + line[--len] = 0; + if(assembler_pass) + debug("%s: 0x%x\n", line, compiledOfs); + else + hash_add_label(line, compiledOfs); + } + else + { + opn = line; + s = strchr(line, ' '); + if(s) + { + *s++ = 0; + arg1 = parsearg((const char**)&s); + if(*s) + { + if(*s != ',') + crap("expected ',', got '%c'", *s); + ++s; + arg2 = parsearg((const char**)&s); + } + } + + if(!opn) + { + crap("no operator in %s", line); + } + + o = getop(opn); + if(!o) + { + crap("cannot handle op %s", opn); + } + o->func(opn, arg1, arg2, o->data); + if(assembler_pass) + debug(" - %s%s", cur_line, cur_line[strlen(cur_line)-1]=='\n'?"":"\n"); + } +} + +#ifdef SA_STANDALONE +int main(int argc, char* argv[]) +{ + char line[4096]; + size_t len; + int pass; + FILE* file = NULL; + + if(argc < 2) + { + crap("specify file"); + } + + file = fopen(argv[1], "r"); + if(!file) + { + crap("can't open file"); + } + + if(argc > 2) + { + fout = fopen(argv[2], "w"); + if(!fout) + { + crap("can't open %s for writing", argv[2]); + } + } + + for(pass = 0; pass < 2; ++pass) + { + if(fseek(file, 0, SEEK_SET)) + crap("can't rewind file"); + + if(pass) + { + char* b = malloc(assembler_get_code_size()); + if(!b) + crap("cannot allocate memory"); + assembler_set_output(b); + } + + assembler_init(pass); + + while(fgets(line, sizeof(line), file)) + { + len = strlen(line); + if(!len) continue; + + assemble_line(line, len); + } + } + + assembler_init(0); + + fclose(file); + + return 0; +} +#endif diff --git a/src/renderer/tr_image.c b/src/renderer/tr_image.c index bd51ff2d..803f86e2 100644 --- a/src/renderer/tr_image.c +++ b/src/renderer/tr_image.c @@ -34,10 +34,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define JPEG_INTERNALS #include "../jpeg-6/jpeglib.h" +#include "../qcommon/puff.h" + static void LoadBMP( const char *name, byte **pic, int *width, int *height ); static void LoadTGA( const char *name, byte **pic, int *width, int *height ); static void LoadJPG( const char *name, byte **pic, int *width, int *height ); +static void LoadPNG( const char *name, byte **pic, int *width, int *height ); static byte s_intensitytable[256]; static unsigned char s_gammatable[256]; @@ -1932,103 +1935,2554 @@ int SaveJPGToBuffer( byte *buffer, int quality, /* ================= -R_LoadImage - -Loads any of the supported image types into a cannonical -32 bit format. +PNG LOADING ================= */ -void R_LoadImage( const char *name, byte **pic, int *width, int *height ) { - int len; - *pic = NULL; - *width = 0; - *height = 0; +/* + * Quake 3 image format : RGBA + */ - len = strlen(name); - if (len<5) { - return; - } +#define Q3IMAGE_BYTESPERPIXEL (4) - if ( !Q_stricmp( name+len-4, ".tga" ) ) { - LoadTGA( name, pic, width, height ); // try tga first - if (!*pic) { // - char altname[MAX_QPATH]; // try jpg in place of tga - strcpy( altname, name ); - len = strlen( altname ); - altname[len-3] = 'j'; - altname[len-2] = 'p'; - altname[len-1] = 'g'; - LoadJPG( altname, pic, width, height ); - } - } else if ( !Q_stricmp(name+len-4, ".pcx") ) { - LoadPCX32( name, pic, width, height ); - } else if ( !Q_stricmp( name+len-4, ".bmp" ) ) { - LoadBMP( name, pic, width, height ); - } else if ( !Q_stricmp( name+len-4, ".jpg" ) ) { - LoadJPG( name, pic, width, height ); - } +/* + * PNG specifications + */ + +/* + * The first 8 Bytes of every PNG-File are a fixed signature + * to identify the file as a PNG. + */ + +#define PNG_Signature "\x89\x50\x4E\x47\xD\xA\x1A\xA" +#define PNG_Signature_Size (8) + +/* + * After the signature diverse chunks follow. + * A chunk consists of a header and if Length + * is bigger than 0 a body and a CRC of the body follow. + */ + +struct PNG_ChunkHeader +{ + uint32_t Length; + uint32_t Type; +}; + +#define PNG_ChunkHeader_Size (8) + +typedef uint32_t PNG_ChunkCRC; + +#define PNG_ChunkCRC_Size (4) + +/* + * We use the following ChunkTypes. + * All others are ignored. + */ + +#define MAKE_CHUNKTYPE(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | ((d))) + +#define PNG_ChunkType_IHDR MAKE_CHUNKTYPE('I', 'H', 'D', 'R') +#define PNG_ChunkType_PLTE MAKE_CHUNKTYPE('P', 'L', 'T', 'E') +#define PNG_ChunkType_IDAT MAKE_CHUNKTYPE('I', 'D', 'A', 'T') +#define PNG_ChunkType_IEND MAKE_CHUNKTYPE('I', 'E', 'N', 'D') +#define PNG_ChunkType_tRNS MAKE_CHUNKTYPE('t', 'R', 'N', 'S') + +/* + * Per specification the first chunk after the signature SHALL be IHDR. + */ + +struct PNG_Chunk_IHDR +{ + uint32_t Width; + uint32_t Height; + uint8_t BitDepth; + uint8_t ColourType; + uint8_t CompressionMethod; + uint8_t FilterMethod; + uint8_t InterlaceMethod; +}; + +#define PNG_Chunk_IHDR_Size (13) + +/* + * ColourTypes + */ + +#define PNG_ColourType_Grey (0) +#define PNG_ColourType_True (2) +#define PNG_ColourType_Indexed (3) +#define PNG_ColourType_GreyAlpha (4) +#define PNG_ColourType_TrueAlpha (6) + +/* + * number of colour components + * + * Grey : 1 grey + * True : 1 R, 1 G, 1 B + * Indexed : 1 index + * GreyAlpha : 1 grey, 1 alpha + * TrueAlpha : 1 R, 1 G, 1 B, 1 alpha + */ + +#define PNG_NumColourComponents_Grey (1) +#define PNG_NumColourComponents_True (3) +#define PNG_NumColourComponents_Indexed (1) +#define PNG_NumColourComponents_GreyAlpha (2) +#define PNG_NumColourComponents_TrueAlpha (4) + +/* + * For the different ColourTypes + * different BitDepths are specified. + */ + +#define PNG_BitDepth_1 ( 1) +#define PNG_BitDepth_2 ( 2) +#define PNG_BitDepth_4 ( 4) +#define PNG_BitDepth_8 ( 8) +#define PNG_BitDepth_16 (16) + +/* + * Only one valid CompressionMethod is standardized. + */ + +#define PNG_CompressionMethod_0 (0) + +/* + * Only one valid FilterMethod is currently standardized. + */ + +#define PNG_FilterMethod_0 (0) + +/* + * This FilterMethod defines 5 FilterTypes + */ + +#define PNG_FilterType_None (0) +#define PNG_FilterType_Sub (1) +#define PNG_FilterType_Up (2) +#define PNG_FilterType_Average (3) +#define PNG_FilterType_Paeth (4) + +/* + * Two InterlaceMethods are standardized : + * 0 - NonInterlaced + * 1 - Interlaced + */ + +#define PNG_InterlaceMethod_NonInterlaced (0) +#define PNG_InterlaceMethod_Interlaced (1) + +/* + * The Adam7 interlace method uses 7 passes. + */ + +#define PNG_Adam7_NumPasses (7) + +/* + * The compressed data starts with a header ... + */ + +struct PNG_ZlibHeader +{ + uint8_t CompressionMethod; + uint8_t Flags; +}; + +#define PNG_ZlibHeader_Size (2) + +/* + * ... and is followed by a check value + */ + +#define PNG_ZlibCheckValue_Size (4) + +/* + * Some support functions for buffered files follow. + */ + +/* + * buffered file representation + */ + +struct BufferedFile +{ + byte *Buffer; + int Length; + byte *Ptr; + int BytesLeft; +}; + +/* + * Read a file into a buffer. + */ + +static struct BufferedFile *ReadBufferedFile(const char *name) +{ + struct BufferedFile *BF; + + /* + * input verification + */ + + if(!name) + { + return(NULL); + } + + /* + * Allocate control struct. + */ + + BF = ri.Malloc(sizeof(struct BufferedFile)); + if(!BF) + { + return(NULL); + } + + /* + * Initialize the structs components. + */ + + BF->Length = 0; + BF->Buffer = NULL; + BF->Ptr = NULL; + BF->BytesLeft = 0; + + /* + * Read the file. + */ + + BF->Length = ri.FS_ReadFile((char *) name, (void **) &BF->Buffer); + + /* + * Did we get it? Is it big enough? + */ + + if(!(BF->Buffer && (BF->Length > 0))) + { + ri.Free(BF); + + return(NULL); + } + + /* + * Set the pointers and counters. + */ + + BF->Ptr = BF->Buffer; + BF->BytesLeft = BF->Length; + + return(BF); } +/* + * Close a buffered file. + */ + +static void CloseBufferedFile(struct BufferedFile *BF) +{ + if(BF) + { + if(BF->Buffer) + { + ri.FS_FreeFile(BF->Buffer); + } + + ri.Free(BF); + } +} /* -=============== -R_FindImageFile + * Get a pointer to the requested bytes. + */ -Finds or loads the given image. -Returns NULL if it fails, not a default image. -============== -*/ -image_t *R_FindImageFile( const char *name, qboolean mipmap, qboolean allowPicmip, int glWrapClampMode ) { - image_t *image; - int width, height; - byte *pic; - long hash; +static void *BufferedFileRead(struct BufferedFile *BF, int Length) +{ + void *RetVal; - if (!name) { - return NULL; - } + /* + * input verification + */ - hash = generateHashValue(name); + if(!(BF && Length)) + { + return(NULL); + } + + /* + * not enough bytes left + */ - // - // see if the image is already loaded - // - for (image=hashTable[hash]; image; image=image->next) { - if ( !strcmp( name, image->imgName ) ) { - // the white image can be used with any set of parms, but other mismatches are errors - if ( strcmp( name, "*white" ) ) { - if ( image->mipmap != mipmap ) { - ri.Printf( PRINT_DEVELOPER, "WARNING: reused image %s with mixed mipmap parm\n", name ); - } - if ( image->allowPicmip != allowPicmip ) { - ri.Printf( PRINT_DEVELOPER, "WARNING: reused image %s with mixed allowPicmip parm\n", name ); - } - if ( image->wrapClampMode != glWrapClampMode ) { - ri.Printf( PRINT_ALL, "WARNING: reused image %s with mixed glWrapClampMode parm\n", name ); - } - } - return image; - } - } + if(Length > BF->BytesLeft) + { + return(NULL); + } - // - // load the pic from disk - // - R_LoadImage( name, &pic, &width, &height ); - if ( pic == NULL ) { // if we dont get a successful load - char altname[MAX_QPATH]; // copy the name - int len; // - strcpy( altname, name ); // - len = strlen( altname ); // - altname[len-3] = toupper(altname[len-3]); // and try upper case extension for unix systems - altname[len-2] = toupper(altname[len-2]); // - altname[len-1] = toupper(altname[len-1]); // - ri.Printf( PRINT_ALL, "trying %s...\n", altname ); // - R_LoadImage( altname, &pic, &width, &height ); // - if (pic == NULL) { // if that fails - return NULL; // bail + /* + * the pointer to the requested data + */ + + RetVal = BF->Ptr; + + /* + * Raise the pointer and counter. + */ + + BF->Ptr += Length; + BF->BytesLeft -= Length; + + return(RetVal); +} + +/* + * Rewind the buffer. + */ + +static qboolean BufferedFileRewind(struct BufferedFile *BF, int Offset) +{ + int BytesRead; + + /* + * input verification + */ + + if(!BF) + { + return(qfalse); + } + + /* + * special trick to rewind to the beginning of the buffer + */ + + if(Offset == -1) + { + BF->Ptr = BF->Buffer; + BF->BytesLeft = BF->Length; + + return(qtrue); + } + + /* + * How many bytes do we have already read? + */ + + BytesRead = BF->Ptr - BF->Buffer; + + /* + * We can only rewind to the beginning of the BufferedFile. + */ + + if(Offset > BytesRead) + { + return(qfalse); + } + + /* + * lower the pointer and counter. + */ + + BF->Ptr -= Offset; + BF->BytesLeft += Offset; + + return(qtrue); +} + +/* + * Skip some bytes. + */ + +static qboolean BufferedFileSkip(struct BufferedFile *BF, int Offset) +{ + /* + * input verification + */ + + if(!BF) + { + return(qfalse); + } + + /* + * We can only skip to the end of the BufferedFile. + */ + + if(Offset > BF->BytesLeft) + { + return(qfalse); + } + + /* + * lower the pointer and counter. + */ + + BF->Ptr += Offset; + BF->BytesLeft -= Offset; + + return(qtrue); +} + +/* + * Find a chunk + */ + +static qboolean FindChunk(struct BufferedFile *BF, uint32_t ChunkType) +{ + struct PNG_ChunkHeader *CH; + + uint32_t Length; + uint32_t Type; + + /* + * input verification + */ + + if(!BF) + { + return(qfalse); + } + + /* + * cycle trough the chunks + */ + + while(qtrue) + { + /* + * Read the chunk-header. + */ + + CH = BufferedFileRead(BF, PNG_ChunkHeader_Size); + if(!CH) + { + return(qfalse); + } + + /* + * Do not swap the original types + * they might be needed later. + */ + + Length = BigLong(CH->Length); + Type = BigLong(CH->Type); + + /* + * We found it! + */ + + if(Type == ChunkType) + { + /* + * Rewind to the start of the chunk. + */ + + BufferedFileRewind(BF, PNG_ChunkHeader_Size); + + break; + } + else + { + /* + * Skip the rest of the chunk. + */ + + if(Length) + { + if(!BufferedFileSkip(BF, Length + PNG_ChunkCRC_Size)) + { + return(qfalse); + } + } + } } + + return(qtrue); +} + +/* + * Decompress all IDATs + */ + +static uint32_t DecompressIDATs(struct BufferedFile *BF, uint8_t **Buffer) +{ + uint8_t *DecompressedData; + uint32_t DecompressedDataLength; + + uint8_t *CompressedData; + uint8_t *CompressedDataPtr; + uint32_t CompressedDataLength; + + struct PNG_ChunkHeader *CH; + + uint32_t Length; + uint32_t Type; + + int BytesToRewind; + + int32_t puffResult; + uint8_t *puffDest; + uint32_t puffDestLen; + uint8_t *puffSrc; + uint32_t puffSrcLen; + + /* + * input verification + */ + + if(!(BF && Buffer)) + { + return(-1); + } + + /* + * some zeroing + */ + + DecompressedData = NULL; + DecompressedDataLength = 0; + *Buffer = DecompressedData; + + CompressedData = NULL; + CompressedDataLength = 0; + + BytesToRewind = 0; + + /* + * Find the first IDAT chunk. + */ + + if(!FindChunk(BF, PNG_ChunkType_IDAT)) + { + return(-1); + } + + /* + * Count the size of the uncompressed data + */ + + while(qtrue) + { + /* + * Read chunk header + */ + + CH = BufferedFileRead(BF, PNG_ChunkHeader_Size); + if(!CH) + { + /* + * Rewind to the start of this adventure + * and return unsuccessfull + */ + + BufferedFileRewind(BF, BytesToRewind); + + return(-1); + } + + /* + * Length and Type of chunk + */ + + Length = BigLong(CH->Length); + Type = BigLong(CH->Type); + + /* + * We have reached the end of the IDAT chunks + */ + + if(!(Type == PNG_ChunkType_IDAT)) + { + BufferedFileRewind(BF, PNG_ChunkHeader_Size); + + break; + } + + /* + * Add chunk header to count. + */ + + BytesToRewind += PNG_ChunkHeader_Size; + + /* + * Skip to next chunk + */ + + if(Length) + { + if(!BufferedFileSkip(BF, Length + PNG_ChunkCRC_Size)) + { + BufferedFileRewind(BF, BytesToRewind); + + return(-1); + } + + BytesToRewind += Length + PNG_ChunkCRC_Size; + CompressedDataLength += Length; + } + } + + BufferedFileRewind(BF, BytesToRewind); + + CompressedData = ri.Malloc(CompressedDataLength); + if(!CompressedData) + { + return(-1); + } + + CompressedDataPtr = CompressedData; + + /* + * Collect the compressed Data + */ + + while(qtrue) + { + /* + * Read chunk header + */ + + CH = BufferedFileRead(BF, PNG_ChunkHeader_Size); + if(!CH) + { + ri.Free(CompressedData); + + return(-1); + } + + /* + * Length and Type of chunk + */ + + Length = BigLong(CH->Length); + Type = BigLong(CH->Type); + + /* + * We have reached the end of the IDAT chunks + */ + + if(!(Type == PNG_ChunkType_IDAT)) + { + BufferedFileRewind(BF, PNG_ChunkHeader_Size); + + break; + } + + /* + * Copy the Data + */ + + if(Length) + { + uint8_t *OrigCompressedData; + + OrigCompressedData = BufferedFileRead(BF, Length); + if(!OrigCompressedData) + { + ri.Free(CompressedData); + + return(-1); + } + + if(!BufferedFileSkip(BF, PNG_ChunkCRC_Size)) + { + ri.Free(CompressedData); + + return(-1); + } + + memcpy(CompressedDataPtr, OrigCompressedData, Length); + CompressedDataPtr += Length; + } + } + + /* + * Let puff() calculate the decompressed data length. + */ + + puffDest = NULL; + puffDestLen = 0; + + /* + * The zlib header and checkvalue don't belong to the compressed data. + */ + + puffSrc = CompressedData + PNG_ZlibHeader_Size; + puffSrcLen = CompressedDataLength - PNG_ZlibHeader_Size - PNG_ZlibCheckValue_Size; + + /* + * first puff() to calculate the size of the uncompressed data + */ + + puffResult = puff(puffDest, &puffDestLen, puffSrc, &puffSrcLen); + if(!((puffResult == 0) && (puffDestLen > 0))) + { + ri.Free(CompressedData); + + return(-1); + } + + /* + * Allocate the buffer for the uncompressed data. + */ + + DecompressedData = ri.Malloc(puffDestLen); + if(!DecompressedData) + { + ri.Free(CompressedData); + + return(-1); + } + + /* + * Set the input again in case something was changed by the last puff() . + */ + + puffDest = DecompressedData; + puffSrc = CompressedData + PNG_ZlibHeader_Size; + puffSrcLen = CompressedDataLength - PNG_ZlibHeader_Size - PNG_ZlibCheckValue_Size; + + /* + * decompression puff() + */ + + puffResult = puff(puffDest, &puffDestLen, puffSrc, &puffSrcLen); + + /* + * The compressed data is not needed anymore. + */ + + ri.Free(CompressedData); + + /* + * Check if the last puff() was successfull. + */ + + if(!((puffResult == 0) && (puffDestLen > 0))) + { + ri.Free(DecompressedData); + + return(-1); + } + + /* + * Set the output of this function. + */ + + DecompressedDataLength = puffDestLen; + *Buffer = DecompressedData; + + return(DecompressedDataLength); +} + +/* + * the Paeth predictor + */ + +static uint8_t PredictPaeth(uint8_t a, uint8_t b, uint8_t c) +{ + /* + * a == Left + * b == Up + * c == UpLeft + */ + + uint8_t Pr; + int p; + int pa, pb, pc; + + Pr = 0; + + p = ((int) a) + ((int) b) - ((int) c); + pa = abs(p - ((int) a)); + pb = abs(p - ((int) b)); + pc = abs(p - ((int) c)); + + if((pa <= pb) && (pa <= pc)) + { + Pr = a; + } + else if(pb <= pc) + { + Pr = b; + } + else + { + Pr = c; + } + + return(Pr); + +} + +/* + * Reverse the filters. + */ + +static qboolean UnfilterImage(uint8_t *DecompressedData, + uint32_t ImageHeight, + uint32_t BytesPerScanline, + uint32_t BytesPerPixel) +{ + uint8_t *DecompPtr; + uint8_t FilterType; + uint8_t *PixelLeft, *PixelUp, *PixelUpLeft; + uint32_t w, h, p; + + /* + * some zeros for the filters + */ + + uint8_t Zeros[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + + /* + * input verification + * + * ImageHeight and BytesPerScanline are not checked, + * because these can be zero in some interlace passes. + */ + + if(!(DecompressedData && BytesPerPixel)) + { + return(qfalse); + } + + + /* + * Set the pointer to the start of the decompressed Data. + */ + + DecompPtr = DecompressedData; + + /* + * Un-filtering is done in place. + */ + + /* + * Go trough all scanlines. + */ + + for(h = 0; h < ImageHeight; h++) + { + /* + * Every scanline starts with a FilterType byte. + */ + + FilterType = *DecompPtr; + DecompPtr++; + + /* + * Left pixel of the first byte in a scanline is zero. + */ + + PixelLeft = Zeros; + + /* + * Set PixelUp to previous line only if we are on the second line or above. + * + * Plus one byte for the FilterType + */ + + if(h > 0) + { + PixelUp = DecompPtr - (BytesPerScanline + 1); + } + else + { + PixelUp = Zeros; + } + + /* + * The pixel left to the first pixel of the previous scanline is zero too. + */ + + PixelUpLeft = Zeros; + + /* + * Cycle trough all pixels of the scanline. + */ + + for(w = 0; w < (BytesPerScanline / BytesPerPixel); w++) + { + /* + * Cycle trough the bytes of the pixel. + */ + + for(p = 0; p < BytesPerPixel; p++) + { + switch(FilterType) + { + case PNG_FilterType_None : + { + /* + * The byte is unfiltered. + */ + + break; + } + + case PNG_FilterType_Sub : + { + DecompPtr[p] += PixelLeft[p]; + + break; + } + + case PNG_FilterType_Up : + { + DecompPtr[p] += PixelUp[p]; + + break; + } + + case PNG_FilterType_Average : + { + DecompPtr[p] += ((uint8_t) ((((uint16_t) PixelLeft[p]) + ((uint16_t) PixelUp[p])) / 2)); + + break; + } + + case PNG_FilterType_Paeth : + { + DecompPtr[p] += PredictPaeth(PixelLeft[p], PixelUp[p], PixelUpLeft[p]); + + break; + } + + default : + { + return(qfalse); + } + } + } + + PixelLeft = DecompPtr; + + /* + * We only have a upleft pixel if we are on the second line or above. + */ + + if(h > 0) + { + PixelUpLeft = DecompPtr - (BytesPerScanline + 1); + } + + /* + * Skip to the next pixel. + */ + + DecompPtr += BytesPerPixel; + + /* + * We only have a previous line if we are on the second line and above. + */ + + if(h > 0) + { + PixelUp = DecompPtr - (BytesPerScanline + 1); + } + } + } + + return(qtrue); +} + +/* + * Convert a raw input pixel to Quake 3 RGA format. + */ + +static qboolean ConvertPixel(struct PNG_Chunk_IHDR *IHDR, + byte *OutPtr, + uint8_t *DecompPtr, + qboolean HasTransparentColour, + uint8_t *TransparentColour, + uint8_t *OutPal) +{ + /* + * input verification + */ + + if(!(IHDR && OutPtr && DecompPtr && TransparentColour && OutPal)) + { + return(qfalse); + } + + switch(IHDR->ColourType) + { + case PNG_ColourType_Grey : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_1 : + case PNG_BitDepth_2 : + case PNG_BitDepth_4 : + { + uint8_t Step; + uint8_t GreyValue; + + Step = 0xFF / ((1 << IHDR->BitDepth) - 1); + + GreyValue = DecompPtr[0] * Step; + + OutPtr[0] = GreyValue; + OutPtr[1] = GreyValue; + OutPtr[2] = GreyValue; + OutPtr[3] = 0xFF; + + /* + * Grey supports full transparency for one specified colour + */ + + if(HasTransparentColour) + { + if(TransparentColour[1] == DecompPtr[0]) + { + OutPtr[3] = 0x00; + } + } + + + break; + } + + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[0]; + OutPtr[2] = DecompPtr[0]; + OutPtr[3] = 0xFF; + + /* + * Grey supports full transparency for one specified colour + */ + + if(HasTransparentColour) + { + if(IHDR->BitDepth == PNG_BitDepth_8) + { + if(TransparentColour[1] == DecompPtr[0]) + { + OutPtr[3] = 0x00; + } + } + else + { + if((TransparentColour[0] == DecompPtr[0]) && (TransparentColour[1] == DecompPtr[1])) + { + OutPtr[3] = 0x00; + } + } + } + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_True : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + { + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[1]; + OutPtr[2] = DecompPtr[2]; + OutPtr[3] = 0xFF; + + /* + * True supports full transparency for one specified colour + */ + + if(HasTransparentColour) + { + if((TransparentColour[1] == DecompPtr[0]) && + (TransparentColour[3] == DecompPtr[1]) && + (TransparentColour[5] == DecompPtr[3])) + { + OutPtr[3] = 0x00; + } + } + + break; + } + + case PNG_BitDepth_16 : + { + /* + * We use only the upper byte. + */ + + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[2]; + OutPtr[2] = DecompPtr[4]; + OutPtr[3] = 0xFF; + + /* + * True supports full transparency for one specified colour + */ + + if(HasTransparentColour) + { + if((TransparentColour[0] == DecompPtr[0]) && (TransparentColour[1] == DecompPtr[1]) && + (TransparentColour[2] == DecompPtr[2]) && (TransparentColour[3] == DecompPtr[3]) && + (TransparentColour[4] == DecompPtr[4]) && (TransparentColour[5] == DecompPtr[5])) + { + OutPtr[3] = 0x00; + } + } + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_Indexed : + { + OutPtr[0] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 0]; + OutPtr[1] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 1]; + OutPtr[2] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 2]; + OutPtr[3] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 3]; + + break; + } + + case PNG_ColourType_GreyAlpha : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + { + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[0]; + OutPtr[2] = DecompPtr[0]; + OutPtr[3] = DecompPtr[1]; + + break; + } + + case PNG_BitDepth_16 : + { + /* + * We use only the upper byte. + */ + + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[0]; + OutPtr[2] = DecompPtr[0]; + OutPtr[3] = DecompPtr[2]; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_TrueAlpha : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + { + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[1]; + OutPtr[2] = DecompPtr[2]; + OutPtr[3] = DecompPtr[3]; + + break; + } + + case PNG_BitDepth_16 : + { + /* + * We use only the upper byte. + */ + + OutPtr[0] = DecompPtr[0]; + OutPtr[1] = DecompPtr[2]; + OutPtr[2] = DecompPtr[4]; + OutPtr[3] = DecompPtr[6]; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + default : + { + return(qfalse); + } + } + + return(qtrue); +} + + +/* + * Decode a non-interlaced image. + */ + +static qboolean DecodeImageNonInterlaced(struct PNG_Chunk_IHDR *IHDR, + byte *OutBuffer, + uint8_t *DecompressedData, + uint32_t DecompressedDataLength, + qboolean HasTransparentColour, + uint8_t *TransparentColour, + uint8_t *OutPal) +{ + uint32_t IHDR_Width; + uint32_t IHDR_Height; + uint32_t BytesPerScanline, BytesPerPixel, PixelsPerByte; + uint32_t w, h, p; + byte *OutPtr; + uint8_t *DecompPtr; + + /* + * input verification + */ + + if(!(IHDR && OutBuffer && DecompressedData && DecompressedDataLength && TransparentColour && OutPal)) + { + return(qfalse); + } + + /* + * byte swapping + */ + + IHDR_Width = BigLong(IHDR->Width); + IHDR_Height = BigLong(IHDR->Height); + + /* + * information for un-filtering + */ + + switch(IHDR->ColourType) + { + case PNG_ColourType_Grey : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_1 : + case PNG_BitDepth_2 : + case PNG_BitDepth_4 : + { + BytesPerPixel = 1; + PixelsPerByte = 8 / IHDR->BitDepth; + + break; + } + + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_Grey; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_True : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_True; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_Indexed : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_1 : + case PNG_BitDepth_2 : + case PNG_BitDepth_4 : + { + BytesPerPixel = 1; + PixelsPerByte = 8 / IHDR->BitDepth; + + break; + } + + case PNG_BitDepth_8 : + { + BytesPerPixel = PNG_NumColourComponents_Indexed; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_GreyAlpha : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_GreyAlpha; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_TrueAlpha : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_TrueAlpha; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + default : + { + return(qfalse); + } + } + + /* + * Calculate the size of one scanline + */ + + BytesPerScanline = (IHDR_Width * BytesPerPixel + (PixelsPerByte - 1)) / PixelsPerByte; + + /* + * Check if we have enough data for the whole image. + */ + + if(!(DecompressedDataLength == ((BytesPerScanline + 1) * IHDR_Height))) + { + return(qfalse); + } + + /* + * Unfilter the image. + */ + + if(!UnfilterImage(DecompressedData, IHDR_Height, BytesPerScanline, BytesPerPixel)) + { + return(qfalse); + } + + /* + * Set the working pointers to the beginning of the buffers. + */ + + OutPtr = OutBuffer; + DecompPtr = DecompressedData; + + /* + * Create the output image. + */ + + for(h = 0; h < IHDR_Height; h++) + { + /* + * Count the pixels on the scanline for those multipixel bytes + */ + + uint32_t CurrPixel; + + /* + * skip FilterType + */ + + DecompPtr++; + + /* + * Reset the pixel count. + */ + + CurrPixel = 0; + + for(w = 0; w < (BytesPerScanline / BytesPerPixel); w++) + { + if(PixelsPerByte > 1) + { + uint8_t Mask; + uint32_t Shift; + uint8_t SinglePixel; + + for(p = 0; p < PixelsPerByte; p++) + { + if(CurrPixel < IHDR_Width) + { + Mask = (1 << IHDR->BitDepth) - 1; + Shift = (PixelsPerByte - 1 - p) * IHDR->BitDepth; + + SinglePixel = ((DecompPtr[0] & (Mask << Shift)) >> Shift); + + if(!ConvertPixel(IHDR, OutPtr, &SinglePixel, HasTransparentColour, TransparentColour, OutPal)) + { + return(qfalse); + } + + OutPtr += Q3IMAGE_BYTESPERPIXEL; + CurrPixel++; + } + } + + } + else + { + if(!ConvertPixel(IHDR, OutPtr, DecompPtr, HasTransparentColour, TransparentColour, OutPal)) + { + return(qfalse); + } + + + OutPtr += Q3IMAGE_BYTESPERPIXEL; + } + + DecompPtr += BytesPerPixel; + } + } + + return(qtrue); +} + +/* + * Decode an interlaced image. + */ + +static qboolean DecodeImageInterlaced(struct PNG_Chunk_IHDR *IHDR, + byte *OutBuffer, + uint8_t *DecompressedData, + uint32_t DecompressedDataLength, + qboolean HasTransparentColour, + uint8_t *TransparentColour, + uint8_t *OutPal) +{ + uint32_t IHDR_Width; + uint32_t IHDR_Height; + uint32_t BytesPerScanline[PNG_Adam7_NumPasses], BytesPerPixel, PixelsPerByte; + uint32_t PassWidth[PNG_Adam7_NumPasses], PassHeight[PNG_Adam7_NumPasses]; + uint32_t WSkip[PNG_Adam7_NumPasses], WOffset[PNG_Adam7_NumPasses], HSkip[PNG_Adam7_NumPasses], HOffset[PNG_Adam7_NumPasses]; + uint32_t w, h, p, a; + byte *OutPtr; + uint8_t *DecompPtr; + uint32_t TargetLength; + + /* + * input verification + */ + + if(!(IHDR && OutBuffer && DecompressedData && DecompressedDataLength && TransparentColour && OutPal)) + { + return(qfalse); + } + + /* + * byte swapping + */ + + IHDR_Width = BigLong(IHDR->Width); + IHDR_Height = BigLong(IHDR->Height); + + /* + * Skip and Offset for the passes. + */ + + WSkip[0] = 8; + WOffset[0] = 0; + HSkip[0] = 8; + HOffset[0] = 0; + + WSkip[1] = 8; + WOffset[1] = 4; + HSkip[1] = 8; + HOffset[1] = 0; + + WSkip[2] = 4; + WOffset[2] = 0; + HSkip[2] = 8; + HOffset[2] = 4; + + WSkip[3] = 4; + WOffset[3] = 2; + HSkip[3] = 4; + HOffset[3] = 0; + + WSkip[4] = 2; + WOffset[4] = 0; + HSkip[4] = 4; + HOffset[4] = 2; + + WSkip[5] = 2; + WOffset[5] = 1; + HSkip[5] = 2; + HOffset[5] = 0; + + WSkip[6] = 1; + WOffset[6] = 0; + HSkip[6] = 2; + HOffset[6] = 1; + + /* + * Calculate the sizes of the passes. + */ + + PassWidth[0] = (IHDR_Width + 7) / 8; + PassHeight[0] = (IHDR_Height + 7) / 8; + + PassWidth[1] = (IHDR_Width + 3) / 8; + PassHeight[1] = (IHDR_Height + 7) / 8; + + PassWidth[2] = (IHDR_Width + 3) / 4; + PassHeight[2] = (IHDR_Height + 3) / 8; + + PassWidth[3] = (IHDR_Width + 1) / 4; + PassHeight[3] = (IHDR_Height + 3) / 4; + + PassWidth[4] = (IHDR_Width + 1) / 2; + PassHeight[4] = (IHDR_Height + 1) / 4; + + PassWidth[5] = (IHDR_Width + 0) / 2; + PassHeight[5] = (IHDR_Height + 1) / 2; + + PassWidth[6] = (IHDR_Width + 0) / 1; + PassHeight[6] = (IHDR_Height + 0) / 2; + + /* + * information for un-filtering + */ + + switch(IHDR->ColourType) + { + case PNG_ColourType_Grey : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_1 : + case PNG_BitDepth_2 : + case PNG_BitDepth_4 : + { + BytesPerPixel = 1; + PixelsPerByte = 8 / IHDR->BitDepth; + + break; + } + + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_Grey; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_True : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_True; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_Indexed : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_1 : + case PNG_BitDepth_2 : + case PNG_BitDepth_4 : + { + BytesPerPixel = 1; + PixelsPerByte = 8 / IHDR->BitDepth; + + break; + } + + case PNG_BitDepth_8 : + { + BytesPerPixel = PNG_NumColourComponents_Indexed; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_GreyAlpha : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_GreyAlpha; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + case PNG_ColourType_TrueAlpha : + { + switch(IHDR->BitDepth) + { + case PNG_BitDepth_8 : + case PNG_BitDepth_16 : + { + BytesPerPixel = (IHDR->BitDepth / 8) * PNG_NumColourComponents_TrueAlpha; + PixelsPerByte = 1; + + break; + } + + default : + { + return(qfalse); + } + } + + break; + } + + default : + { + return(qfalse); + } + } + + /* + * Calculate the size of the scanlines per pass + */ + + for(a = 0; a < PNG_Adam7_NumPasses; a++) + { + BytesPerScanline[a] = (PassWidth[a] * BytesPerPixel + (PixelsPerByte - 1)) / PixelsPerByte; + } + + /* + * Calculate the size of all passes + */ + + TargetLength = 0; + + for(a = 0; a < PNG_Adam7_NumPasses; a++) + { + TargetLength += ((BytesPerScanline[a] + (BytesPerScanline[a] ? 1 : 0)) * PassHeight[a]); + } + + /* + * Check if we have enough data for the whole image. + */ + + if(!(DecompressedDataLength == TargetLength)) + { + return(qfalse); + } + + /* + * Unfilter the image. + */ + + DecompPtr = DecompressedData; + + for(a = 0; a < PNG_Adam7_NumPasses; a++) + { + if(!UnfilterImage(DecompPtr, PassHeight[a], BytesPerScanline[a], BytesPerPixel)) + { + return(qfalse); + } + + DecompPtr += ((BytesPerScanline[a] + (BytesPerScanline[a] ? 1 : 0)) * PassHeight[a]); + } + + /* + * Set the working pointers to the beginning of the buffers. + */ + + DecompPtr = DecompressedData; + + /* + * Create the output image. + */ + + for(a = 0; a < PNG_Adam7_NumPasses; a++) + { + for(h = 0; h < PassHeight[a]; h++) + { + /* + * Count the pixels on the scanline for those multipixel bytes + */ + + uint32_t CurrPixel; + + /* + * skip FilterType + */ + + DecompPtr++; + + /* + * Reset the pixel count. + */ + + CurrPixel = 0; + + for(w = 0; w < (BytesPerScanline[a] / BytesPerPixel); w++) + { + if(PixelsPerByte > 1) + { + uint8_t Mask; + uint32_t Shift; + uint8_t SinglePixel; + + for(p = 0; p < PixelsPerByte; p++) + { + if(CurrPixel < PassWidth[a]) + { + Mask = (1 << IHDR->BitDepth) - 1; + Shift = (PixelsPerByte - 1 - p) * IHDR->BitDepth; + + SinglePixel = ((DecompPtr[0] & (Mask << Shift)) >> Shift); + + OutPtr = OutBuffer + (((((h * HSkip[a]) + HOffset[a]) * IHDR_Width) + ((CurrPixel * WSkip[a]) + WOffset[a])) * Q3IMAGE_BYTESPERPIXEL); + + if(!ConvertPixel(IHDR, OutPtr, &SinglePixel, HasTransparentColour, TransparentColour, OutPal)) + { + return(qfalse); + } + + CurrPixel++; + } + } + + } + else + { + OutPtr = OutBuffer + (((((h * HSkip[a]) + HOffset[a]) * IHDR_Width) + ((w * WSkip[a]) + WOffset[a])) * Q3IMAGE_BYTESPERPIXEL); + + if(!ConvertPixel(IHDR, OutPtr, DecompPtr, HasTransparentColour, TransparentColour, OutPal)) + { + return(qfalse); + } + } + + DecompPtr += BytesPerPixel; + } + } + } + + return(qtrue); +} + +/* + * The PNG loader + */ + +static void LoadPNG(const char *name, byte **pic, int *width, int *height) +{ + struct BufferedFile *ThePNG; + byte *OutBuffer; + uint8_t *Signature; + struct PNG_ChunkHeader *CH; + uint32_t ChunkHeaderLength; + uint32_t ChunkHeaderType; + struct PNG_Chunk_IHDR *IHDR; + uint32_t IHDR_Width; + uint32_t IHDR_Height; + PNG_ChunkCRC *CRC; + uint8_t *InPal; + uint8_t *DecompressedData; + uint32_t DecompressedDataLength; + uint32_t i; + + /* + * palette with 256 RGBA entries + */ + + uint8_t OutPal[1024]; + + /* + * transparent colour from the tRNS chunk + */ + + qboolean HasTransparentColour = qfalse; + uint8_t TransparentColour[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + + /* + * input verification + */ + + if(!(name && pic)) + { + return; + } + + /* + * Zero out return values. + */ + + *pic = NULL; + + if(width) + { + *width = 0; + } + + if(height) + { + *height = 0; + } + + /* + * Read the file. + */ + + ThePNG = ReadBufferedFile(name); + if(!ThePNG) + { + return; + } + + /* + * Read the siganture of the file. + */ + + Signature = BufferedFileRead(ThePNG, PNG_Signature_Size); + if(!Signature) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Is it a PNG? + */ + + if(memcmp(Signature, PNG_Signature, PNG_Signature_Size)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the first chunk-header. + */ + + CH = BufferedFileRead(ThePNG, PNG_ChunkHeader_Size); + if(!CH) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * PNG multi-byte types are in Big Endian + */ + + ChunkHeaderLength = BigLong(CH->Length); + ChunkHeaderType = BigLong(CH->Type); + + /* + * Check if the first chunk is an IHDR. + */ + + if(!((ChunkHeaderType == PNG_ChunkType_IHDR) && (ChunkHeaderLength == PNG_Chunk_IHDR_Size))) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the IHDR. + */ + + IHDR = BufferedFileRead(ThePNG, PNG_Chunk_IHDR_Size); + if(!IHDR) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the CRC for IHDR + */ + + CRC = BufferedFileRead(ThePNG, PNG_ChunkCRC_Size); + if(!CRC) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Here we could check the CRC if we wanted to. + */ + + /* + * multi-byte type swapping + */ + + IHDR_Width = BigLong(IHDR->Width); + IHDR_Height = BigLong(IHDR->Height); + + /* + * Check if Width and Height are valid. + */ + + if(!((IHDR_Width > 0) && (IHDR_Height > 0))) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Do we need to check if the dimensions of the image are valid for Quake3? + */ + + /* + * Check if CompressionMethod and FilterMethod are valid. + */ + + if(!((IHDR->CompressionMethod == PNG_CompressionMethod_0) && (IHDR->FilterMethod == PNG_FilterMethod_0))) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Check if InterlaceMethod is valid. + */ + + if(!((IHDR->InterlaceMethod == PNG_InterlaceMethod_NonInterlaced) || (IHDR->InterlaceMethod == PNG_InterlaceMethod_Interlaced))) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read palette for an indexed image. + */ + + if(IHDR->ColourType == PNG_ColourType_Indexed) + { + /* + * We need the palette first. + */ + + if(!FindChunk(ThePNG, PNG_ChunkType_PLTE)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the chunk-header. + */ + + CH = BufferedFileRead(ThePNG, PNG_ChunkHeader_Size); + if(!CH) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * PNG multi-byte types are in Big Endian + */ + + ChunkHeaderLength = BigLong(CH->Length); + ChunkHeaderType = BigLong(CH->Type); + + /* + * Check if the chunk is an PLTE. + */ + + if(!(ChunkHeaderType == PNG_ChunkType_PLTE)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Check if Length is divisible by 3 + */ + + if(ChunkHeaderLength % 3) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the raw palette data + */ + + InPal = BufferedFileRead(ThePNG, ChunkHeaderLength); + if(!InPal) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the CRC for the palette + */ + + CRC = BufferedFileRead(ThePNG, PNG_ChunkCRC_Size); + if(!CRC) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Set some default values. + */ + + for(i = 0; i < 256; i++) + { + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 0] = 0x00; + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 1] = 0x00; + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 2] = 0x00; + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 3] = 0xFF; + } + + /* + * Convert to the Quake3 RGBA-format. + */ + + for(i = 0; i < (ChunkHeaderLength / 3); i++) + { + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 0] = InPal[i*3+0]; + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 1] = InPal[i*3+1]; + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 2] = InPal[i*3+2]; + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 3] = 0xFF; + } + } + + /* + * transparency information is sometimes stored in an tRNS chunk + */ + + /* + * Let's see if there is a tRNS chunk + */ + + if(FindChunk(ThePNG, PNG_ChunkType_tRNS)) + { + uint8_t *Trans; + + /* + * Read the chunk-header. + */ + + CH = BufferedFileRead(ThePNG, PNG_ChunkHeader_Size); + if(!CH) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * PNG multi-byte types are in Big Endian + */ + + ChunkHeaderLength = BigLong(CH->Length); + ChunkHeaderType = BigLong(CH->Type); + + /* + * Check if the chunk is an tRNS. + */ + + if(!(ChunkHeaderType == PNG_ChunkType_tRNS)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the transparency information. + */ + + Trans = BufferedFileRead(ThePNG, ChunkHeaderLength); + if(!Trans) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Read the CRC. + */ + + CRC = BufferedFileRead(ThePNG, PNG_ChunkCRC_Size); + if(!CRC) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Only for Grey, True and Indexed ColourType should tRNS exist. + */ + + switch(IHDR->ColourType) + { + case PNG_ColourType_Grey : + { + if(!ChunkHeaderLength == 2) + { + CloseBufferedFile(ThePNG); + + return; + } + + HasTransparentColour = qtrue; + + /* + * Grey can have one colour which is completely transparent. + * This colour is always stored in 16 bits. + */ + + TransparentColour[0] = Trans[0]; + TransparentColour[1] = Trans[1]; + + break; + } + + case PNG_ColourType_True : + { + if(!ChunkHeaderLength == 6) + { + CloseBufferedFile(ThePNG); + + return; + } + + HasTransparentColour = qtrue; + + /* + * True can have one colour which is completely transparent. + * This colour is always stored in 16 bits. + */ + + TransparentColour[0] = Trans[0]; + TransparentColour[1] = Trans[1]; + TransparentColour[2] = Trans[2]; + TransparentColour[3] = Trans[3]; + TransparentColour[4] = Trans[4]; + TransparentColour[5] = Trans[5]; + + break; + } + + case PNG_ColourType_Indexed : + { + /* + * Maximum of 256 one byte transparency entries. + */ + + if(ChunkHeaderLength > 256) + { + CloseBufferedFile(ThePNG); + + return; + } + + HasTransparentColour = qtrue; + + /* + * alpha values for palette entries + */ + + for(i = 0; i < ChunkHeaderLength; i++) + { + OutPal[i * Q3IMAGE_BYTESPERPIXEL + 3] = Trans[i]; + } + + break; + } + + /* + * All other ColourTypes should not have tRNS chunks + */ + + default : + { + CloseBufferedFile(ThePNG); + + return; + } + } + } + + /* + * Rewind to the start of the file. + */ + + if(!BufferedFileRewind(ThePNG, -1)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Skip the signature + */ + + if(!BufferedFileSkip(ThePNG, PNG_Signature_Size)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Decompress all IDAT chunks + */ + + DecompressedDataLength = DecompressIDATs(ThePNG, &DecompressedData); + if(!(DecompressedDataLength && DecompressedData)) + { + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Allocate output buffer. + */ + + OutBuffer = ri.Malloc(IHDR_Width * IHDR_Height * Q3IMAGE_BYTESPERPIXEL); + if(!OutBuffer) + { + ri.Free(DecompressedData); + CloseBufferedFile(ThePNG); + + return; + } + + /* + * Interlaced and Non-interlaced images need to be handled differently. + */ + + switch(IHDR->InterlaceMethod) + { + case PNG_InterlaceMethod_NonInterlaced : + { + if(!DecodeImageNonInterlaced(IHDR, OutBuffer, DecompressedData, DecompressedDataLength, HasTransparentColour, TransparentColour, OutPal)) + { + ri.Free(OutBuffer); + ri.Free(DecompressedData); + CloseBufferedFile(ThePNG); + + return; + } + + break; + } + + case PNG_InterlaceMethod_Interlaced : + { + if(!DecodeImageInterlaced(IHDR, OutBuffer, DecompressedData, DecompressedDataLength, HasTransparentColour, TransparentColour, OutPal)) + { + ri.Free(OutBuffer); + ri.Free(DecompressedData); + CloseBufferedFile(ThePNG); + + return; + } + + break; + } + + default : + { + ri.Free(OutBuffer); + ri.Free(DecompressedData); + CloseBufferedFile(ThePNG); + + return; + } + } + + /* + * update the pointer to the image data + */ + + *pic = OutBuffer; + + /* + * Fill width and height. + */ + + if(width) + { + *width = IHDR_Width; + } + + if(height) + { + *height = IHDR_Height; + } + + /* + * DecompressedData is not needed anymore. + */ + + ri.Free(DecompressedData); + + /* + * We have all data, so close the file. + */ + + CloseBufferedFile(ThePNG); +} + +//=================================================================== + +/* +================= +R_LoadImage + +Loads any of the supported image types into a cannonical +32 bit format. +================= +*/ +void R_LoadImage( const char *name, byte **pic, int *width, int *height ) { + int len; + + *pic = NULL; + *width = 0; + *height = 0; + + len = strlen(name); + if (len<5) { + return; + } + + if ( !Q_stricmp( name+len-4, ".tga" ) ) { + LoadTGA( name, pic, width, height ); + + // This is a hack to get around the fact that some + // baseq3 shaders refer to tga files where the images + // are actually jpgs + if (!*pic) { + // try jpg in place of tga + char altname[MAX_QPATH]; + + strcpy( altname, name ); + len = strlen( altname ); + altname[len-3] = 'j'; + altname[len-2] = 'p'; + altname[len-1] = 'g'; + + ri.Printf( PRINT_DEVELOPER, "WARNING: %s failed, trying %s\n", name, altname ); + LoadJPG( altname, pic, width, height ); + } + } + else if ( !Q_stricmp(name+len-4, ".pcx") ) + { + LoadPCX32( name, pic, width, height ); + } + else if ( !Q_stricmp( name+len-4, ".bmp" ) ) + { + LoadBMP( name, pic, width, height ); + } + else if ( !Q_stricmp( name+len-4, ".jpg" ) ) + { + LoadJPG( name, pic, width, height ); + } + else if ( !Q_stricmp( name+len-4, ".png" ) ) + { + LoadPNG( name, pic, width, height ); + } +} + + +/* +=============== +R_FindImageFile + +Finds or loads the given image. +Returns NULL if it fails, not a default image. +============== +*/ +image_t *R_FindImageFile( const char *name, qboolean mipmap, qboolean allowPicmip, int glWrapClampMode ) { + image_t *image; + int width, height; + byte *pic; + long hash; + + if (!name) { + return NULL; + } + + hash = generateHashValue(name); + + // + // see if the image is already loaded + // + for (image=hashTable[hash]; image; image=image->next) { + if ( !strcmp( name, image->imgName ) ) { + // the white image can be used with any set of parms, but other mismatches are errors + if ( strcmp( name, "*white" ) ) { + if ( image->mipmap != mipmap ) { + ri.Printf( PRINT_DEVELOPER, "WARNING: reused image %s with mixed mipmap parm\n", name ); + } + if ( image->allowPicmip != allowPicmip ) { + ri.Printf( PRINT_DEVELOPER, "WARNING: reused image %s with mixed allowPicmip parm\n", name ); + } + if ( image->wrapClampMode != glWrapClampMode ) { + ri.Printf( PRINT_ALL, "WARNING: reused image %s with mixed glWrapClampMode parm\n", name ); + } + } + return image; + } + } + + // + // load the pic from disk + // + R_LoadImage( name, &pic, &width, &height ); + if ( pic == NULL ) { + return NULL; } image = R_CreateImage( ( char * ) name, pic, width, height, mipmap, allowPicmip, glWrapClampMode ); diff --git a/src/renderer/tr_init.c b/src/renderer/tr_init.c index b9b676a9..cede88a6 100644 --- a/src/renderer/tr_init.c +++ b/src/renderer/tr_init.c @@ -1161,7 +1161,7 @@ void R_Init( void ) { // Swap_Init(); - if ( (int)tess.xyz & 15 ) { + if ( (intptr_t)tess.xyz & 15 ) { Com_Printf( "WARNING: tess.xyz not 16 byte aligned\n" ); } Com_Memset( tess.constantColor255, 255, sizeof( tess.constantColor255 ) ); diff --git a/src/renderer/tr_shader.c b/src/renderer/tr_shader.c index 10223a5c..dd51a974 100644 --- a/src/renderer/tr_shader.c +++ b/src/renderer/tr_shader.c @@ -2867,32 +2867,32 @@ static void ScanAndLoadShaderFiles( void ) char **shaderFiles; char *buffers[MAX_SHADER_FILES]; char *p; - int numShaders; + int numShaderFiles; int i; char *oldp, *token, *hashMem; int shaderTextHashTableSizes[MAX_SHADERTEXT_HASH], hash, size; long sum = 0; // scan for shader files - shaderFiles = ri.FS_ListFiles( "scripts", ".shader", &numShaders ); + shaderFiles = ri.FS_ListFiles( "scripts", ".shader", &numShaderFiles ); - if ( !shaderFiles || !numShaders ) + if ( !shaderFiles || !numShaderFiles ) { ri.Printf( PRINT_WARNING, "WARNING: no shader files found\n" ); return; } - if ( numShaders > MAX_SHADER_FILES ) { - numShaders = MAX_SHADER_FILES; + if ( numShaderFiles > MAX_SHADER_FILES ) { + numShaderFiles = MAX_SHADER_FILES; } // load and parse shader files - for ( i = 0; i < numShaders; i++ ) + for ( i = 0; i < numShaderFiles; i++ ) { char filename[MAX_QPATH]; Com_sprintf( filename, sizeof( filename ), "scripts/%s", shaderFiles[i] ); - ri.Printf( PRINT_ALL, "...loading '%s'\n", filename ); + ri.Printf( PRINT_DEVELOPER, "...loading '%s'\n", filename ); sum += ri.FS_ReadFile( filename, (void **)&buffers[i] ); if ( !buffers[i] ) { ri.Error( ERR_DROP, "Couldn't load %s", filename ); @@ -2900,16 +2900,16 @@ static void ScanAndLoadShaderFiles( void ) } // build single large buffer - s_shaderText = ri.Hunk_Alloc( sum + numShaders*2, h_low ); + s_shaderText = ri.Hunk_Alloc( sum + numShaderFiles*2, h_low ); + s_shaderText[ 0 ] = '\0'; // free in reverse order, so the temp files are all dumped - for ( i = numShaders - 1; i >= 0 ; i-- ) { - strcat( s_shaderText, "\n" ); + for ( i = numShaderFiles - 1; i >= 0 ; i-- ) { p = &s_shaderText[strlen(s_shaderText)]; strcat( s_shaderText, buffers[i] ); ri.FS_FreeFile( buffers[i] ); - buffers[i] = p; COM_Compress(p); + strcat( s_shaderText, "\n" ); } // free up memory @@ -2917,28 +2917,19 @@ static void ScanAndLoadShaderFiles( void ) Com_Memset(shaderTextHashTableSizes, 0, sizeof(shaderTextHashTableSizes)); size = 0; - // - for ( i = 0; i < numShaders; i++ ) { - // pointer to the first shader file - p = buffers[i]; - // look for label - while ( 1 ) { - token = COM_ParseExt( &p, qtrue ); - if ( token[0] == 0 ) { - break; - } - hash = generateHashValue(token, MAX_SHADERTEXT_HASH); - shaderTextHashTableSizes[hash]++; - size++; - SkipBracedSection(&p); - // if we passed the pointer to the next shader file - if ( i < numShaders - 1 ) { - if ( p > buffers[i+1] ) { - break; - } - } + p = s_shaderText; + // look for shader names + while ( 1 ) { + token = COM_ParseExt( &p, qtrue ); + if ( token[0] == 0 ) { + break; } + + hash = generateHashValue(token, MAX_SHADERTEXT_HASH); + shaderTextHashTableSizes[hash]++; + size++; + SkipBracedSection(&p); } size += MAX_SHADERTEXT_HASH; @@ -2951,29 +2942,20 @@ static void ScanAndLoadShaderFiles( void ) } Com_Memset(shaderTextHashTableSizes, 0, sizeof(shaderTextHashTableSizes)); - // - for ( i = 0; i < numShaders; i++ ) { - // pointer to the first shader file - p = buffers[i]; - // look for label - while ( 1 ) { - oldp = p; - token = COM_ParseExt( &p, qtrue ); - if ( token[0] == 0 ) { - break; - } - - hash = generateHashValue(token, MAX_SHADERTEXT_HASH); - shaderTextHashTable[hash][shaderTextHashTableSizes[hash]++] = oldp; - SkipBracedSection(&p); - // if we passed the pointer to the next shader file - if ( i < numShaders - 1 ) { - if ( p > buffers[i+1] ) { - break; - } - } + p = s_shaderText; + // look for shader names + while ( 1 ) { + oldp = p; + token = COM_ParseExt( &p, qtrue ); + if ( token[0] == 0 ) { + break; } + + hash = generateHashValue(token, MAX_SHADERTEXT_HASH); + shaderTextHashTable[hash][shaderTextHashTableSizes[hash]++] = oldp; + + SkipBracedSection(&p); } return; diff --git a/src/unix/linux_signals.c b/src/unix/linux_signals.c index 72c5a522..d3c44267 100644 --- a/src/unix/linux_signals.c +++ b/src/unix/linux_signals.c @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "../renderer/tr_local.h" #endif -static qboolean signalcaught = qfalse;; +static qboolean signalcaught = qfalse; void Sys_Exit(int); // bk010104 - abstraction -- cgit