diff options
author | Tim Angus <tim@ngus.net> | 2007-08-24 00:32:53 +0000 |
---|---|---|
committer | Tim Angus <tim@ngus.net> | 2007-08-24 00:32:53 +0000 |
commit | d6cbf3366e63e4e4333e49377ee994d26b266a6c (patch) | |
tree | 18c6488dbd654cb747534fc7a1dcd240be492bdb /src/qcommon | |
parent | 9bb5d25cc24fd678660af34b9b19e21a37f09bd3 (diff) |
* Merged ioq3-r1133
+ PNG image loader
+ Non-gas dependent x86_64 VM
+ Collision optimisations
+ Slew of other bug fixes
Diffstat (limited to 'src/qcommon')
-rw-r--r-- | src/qcommon/cm_patch.c | 7 | ||||
-rw-r--r-- | src/qcommon/cm_test.c | 4 | ||||
-rw-r--r-- | src/qcommon/cm_trace.c | 5 | ||||
-rw-r--r-- | src/qcommon/common.c | 2 | ||||
-rw-r--r-- | src/qcommon/files.c | 28 | ||||
-rw-r--r-- | src/qcommon/msg.c | 4 | ||||
-rw-r--r-- | src/qcommon/puff.c | 758 | ||||
-rw-r--r-- | src/qcommon/puff.h | 43 | ||||
-rw-r--r-- | src/qcommon/q_math.c | 47 | ||||
-rw-r--r-- | src/qcommon/q_shared.h | 9 | ||||
-rw-r--r-- | src/qcommon/vm_x86.c | 2 | ||||
-rw-r--r-- | src/qcommon/vm_x86_64.c | 152 | ||||
-rw-r--r-- | src/qcommon/vm_x86_64_assembler.c | 1419 |
13 files changed, 2431 insertions, 49 deletions
diff --git a/src/qcommon/cm_patch.c b/src/qcommon/cm_patch.c index f262db9c..38b7d5cc 100644 --- a/src/qcommon/cm_patch.c +++ b/src/qcommon/cm_patch.c @@ -1154,7 +1154,7 @@ struct patchCollide_s *CM_GeneratePatchCollide( int width, int height, vec3_t *p if ( width <= 2 || height <= 2 || !points ) { Com_Error( ERR_DROP, "CM_GeneratePatchFacets: bad parameters: (%i, %i, %p)", - width, height, points ); + width, height, (void *)points ); } if ( !(width & 1) || !(height & 1) ) { @@ -1387,6 +1387,11 @@ void CM_TraceThroughPatchCollide( traceWork_t *tw, const struct patchCollide_s * static cvar_t *cv; #endif //BSPC + if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1], + pc->bounds[0], pc->bounds[1] ) ) { + return; + } + if (tw->isPoint) { CM_TracePointThroughPatchCollide( tw, pc ); return; diff --git a/src/qcommon/cm_test.c b/src/qcommon/cm_test.c index 9e950603..485facc2 100644 --- a/src/qcommon/cm_test.c +++ b/src/qcommon/cm_test.c @@ -251,6 +251,10 @@ int CM_PointContents( const vec3_t p, clipHandle_t model ) { brushnum = cm.leafbrushes[leaf->firstLeafBrush+k]; b = &cm.brushes[brushnum]; + if ( !BoundsIntersectPoint( b->bounds[0], b->bounds[1], p ) ) { + continue; + } + // see if the point is in the brush for ( i = 0 ; i < b->numsides ; i++ ) { d = DotProduct( p, b->sides[i].plane->normal ); diff --git a/src/qcommon/cm_trace.c b/src/qcommon/cm_trace.c index ee9540e3..c40e1013 100644 --- a/src/qcommon/cm_trace.c +++ b/src/qcommon/cm_trace.c @@ -847,6 +847,11 @@ void CM_TraceThroughLeaf( traceWork_t *tw, cLeaf_t *leaf ) { b->collided = qfalse; + if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1], + b->bounds[0], b->bounds[1] ) ) { + continue; + } + CM_TraceThroughBrush( tw, b ); if ( !tw->trace.fraction ) { tw->trace.lateralFraction = 0.0f; diff --git a/src/qcommon/common.c b/src/qcommon/common.c index cdfe3386..be4eafd1 100644 --- a/src/qcommon/common.c +++ b/src/qcommon/common.c @@ -1257,7 +1257,7 @@ void Com_Meminfo_f( void ) { for (block = mainzone->blocklist.next ; ; block = block->next) { if ( Cmd_Argc() != 1 ) { Com_Printf ("block:%p size:%7i tag:%3i\n", - block, block->size, block->tag); + (void *)block, block->size, block->tag); } if ( block->tag ) { zoneBytes += block->size; diff --git a/src/qcommon/files.c b/src/qcommon/files.c index cd369908..c1f6fb2d 100644 --- a/src/qcommon/files.c +++ b/src/qcommon/files.c @@ -975,22 +975,6 @@ qboolean FS_FilenameCompare( const char *s1, const char *s2 ) { /* =========== -FS_ShiftedStrStr -=========== -*/ -char *FS_ShiftedStrStr(const char *string, const char *substring, int shift) { - char buf[MAX_STRING_TOKENS]; - int i; - - for (i = 0; substring[i]; i++) { - buf[i] = substring[i] + shift; - } - buf[i] = '\0'; - return strstr(string, buf); -} - -/* -=========== FS_FOpenFileRead Finds the file in the search path. @@ -1117,19 +1101,13 @@ int FS_FOpenFileRead( const char *filename, fileHandle_t *file, qboolean uniqueF } } - // game.qvm - 13 - // ZT`X!di` - if (!(pak->referenced & FS_QAGAME_REF) && FS_ShiftedStrStr(filename, "ZT`X!di`", 13)) { + if (!(pak->referenced & FS_QAGAME_REF) && strstr(filename, "game.qvm")) { pak->referenced |= FS_QAGAME_REF; } - // cgame.qvm - 7 - // \`Zf^'jof - if (!(pak->referenced & FS_CGAME_REF) && FS_ShiftedStrStr(filename , "\\`Zf^'jof", 7)) { + if (!(pak->referenced & FS_CGAME_REF) && strstr(filename, "cgame.qvm")) { pak->referenced |= FS_CGAME_REF; } - // ui.qvm - 5 - // pd)lqh - if (!(pak->referenced & FS_UI_REF) && FS_ShiftedStrStr(filename , "pd)lqh", 5)) { + if (!(pak->referenced & FS_UI_REF) && strstr(filename, "ui.qvm")) { pak->referenced |= FS_UI_REF; } diff --git a/src/qcommon/msg.c b/src/qcommon/msg.c index 46019809..d46c2a3b 100644 --- a/src/qcommon/msg.c +++ b/src/qcommon/msg.c @@ -791,7 +791,7 @@ typedef struct { } netField_t; // using the stringizing operator to save typing... -#define NETF(x) #x,(int)&((entityState_t*)0)->x +#define NETF(x) #x,(size_t)&((entityState_t*)0)->x netField_t entityStateFields[] = { @@ -1106,7 +1106,7 @@ plyer_state_t communication */ // using the stringizing operator to save typing... -#define PSF(x) #x,(int)&((playerState_t*)0)->x +#define PSF(x) #x,(size_t)&((playerState_t*)0)->x netField_t playerStateFields[] = { diff --git a/src/qcommon/puff.c b/src/qcommon/puff.c new file mode 100644 index 00000000..721854d8 --- /dev/null +++ b/src/qcommon/puff.c @@ -0,0 +1,758 @@ +/* + * This is a modified version of Mark Adlers work, + * see below for the original copyright. + * 2006 - Joerg Dietrich <dietrich_joerg@gmx.de> + */ + +/* + * puff.c + * Copyright (C) 2002-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in puff.h + * version 1.8, 9 Jan 2004 + * + * puff.c is a simple inflate written to be an unambiguous way to specify the + * deflate format. It is not written for speed but rather simplicity. As a + * side benefit, this code might actually be useful when small code is more + * important than speed, such as bootstrap applications. For typical deflate + * data, zlib's inflate() is about four times as fast as puff(). zlib's + * inflate compiles to around 20K on my machine, whereas puff.c compiles to + * around 4K on my machine (a PowerPC using GNU cc). If the faster decode() + * function here is used, then puff() is only twice as slow as zlib's + * inflate(). + * + * All dynamically allocated memory comes from the stack. The stack required + * is less than 2K bytes. This code is compatible with 16-bit int's and + * assumes that long's are at least 32 bits. puff.c uses the short data type, + * assumed to be 16 bits, for arrays in order to to conserve memory. The code + * works whether integers are stored big endian or little endian. + * + * In the comments below are "Format notes" that describe the inflate process + * and document some of the less obvious aspects of the format. This source + * code is meant to supplement RFC 1951, which formally describes the deflate + * format: + * + * http://www.zlib.org/rfc-deflate.html + */ + +/* + * Change history: + * + * 1.0 10 Feb 2002 - First version + * 1.1 17 Feb 2002 - Clarifications of some comments and notes + * - Update puff() dest and source pointers on negative + * errors to facilitate debugging deflators + * - Remove longest from struct huffman -- not needed + * - Simplify offs[] index in construct() + * - Add input size and checking, using longjmp() to + * maintain easy readability + * - Use short data type for large arrays + * - Use pointers instead of long to specify source and + * destination sizes to avoid arbitrary 4 GB limits + * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!), + * but leave simple version for readabilty + * - Make sure invalid distances detected if pointers + * are 16 bits + * - Fix fixed codes table error + * - Provide a scanning mode for determining size of + * uncompressed data + * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Jean-loup] + * - Add a puff.h file for the interface + * - Add braces in puff() for else do [Jean-loup] + * - Use indexes instead of pointers for readability + * 1.4 31 Mar 2002 - Simplify construct() code set check + * - Fix some comments + * - Add FIXLCODES #define + * 1.5 6 Apr 2002 - Minor comment fixes + * 1.6 7 Aug 2002 - Minor format changes + * 1.7 3 Mar 2003 - Added test code for distribution + * - Added zlib-like license + * 1.8 9 Jan 2004 - Added some comments on no distance codes case + */ + +#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */ +#include "puff.h" /* prototype for puff() */ + +#define local static /* for local function definitions */ + +/* + * Maximums for allocations and loops. It is not useful to change these -- + * they are fixed by the deflate format. + */ +#define MAXBITS 15 /* maximum bits in a code */ +#define MAXLCODES 286 /* maximum number of literal/length codes */ +#define MAXDCODES 30 /* maximum number of distance codes */ +#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */ +#define FIXLCODES 288 /* number of fixed literal/length codes */ + +/* input and output state */ +struct state { + /* output state */ + uint8_t *out; /* output buffer */ + uint32_t outlen; /* available space at out */ + uint32_t outcnt; /* bytes written to out so far */ + + /* input state */ + uint8_t *in; /* input buffer */ + uint32_t inlen; /* available input at in */ + uint32_t incnt; /* bytes read so far */ + int32_t bitbuf; /* bit buffer */ + int32_t bitcnt; /* number of bits in bit buffer */ + + /* input limit error return state for bits() and decode() */ + jmp_buf env; +}; + +/* + * Return need bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ +local int32_t bits(struct state *s, int32_t need) +{ + int32_t val; /* bit accumulator (can use up to 20 bits) */ + + /* load at least need bits into val */ + val = s->bitbuf; + while (s->bitcnt < need) { + if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ + val |= (int32_t)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */ + s->bitcnt += 8; + } + + /* drop need bits and update buffer, always zero to seven bits left */ + s->bitbuf = (int32_t)(val >> need); + s->bitcnt -= need; + + /* return need bits, zeroing the bits above that */ + return (int32_t)(val & ((1L << need) - 1)); +} + +/* + * Process a stored block. + * + * Format notes: + * + * - After the two-bit stored block type (00), the stored block length and + * stored bytes are byte-aligned for fast copying. Therefore any leftover + * bits in the byte that has the last bit of the type, as many as seven, are + * discarded. The value of the discarded bits are not defined and should not + * be checked against any expectation. + * + * - The second inverted copy of the stored block length does not have to be + * checked, but it's probably a good idea to do so anyway. + * + * - A stored block can have zero length. This is sometimes used to byte-align + * subsets of the compressed data for random access or partial recovery. + */ +local int32_t stored(struct state *s) +{ + uint32_t len; /* length of stored block */ + + /* discard leftover bits from current byte (assumes s->bitcnt < 8) */ + s->bitbuf = 0; + s->bitcnt = 0; + + /* get length and check against its one's complement */ + if (s->incnt + 4 > s->inlen) return 2; /* not enough input */ + len = s->in[s->incnt++]; + len |= s->in[s->incnt++] << 8; + if (s->in[s->incnt++] != (~len & 0xff) || + s->in[s->incnt++] != ((~len >> 8) & 0xff)) + return -2; /* didn't match complement! */ + + /* copy len bytes from in to out */ + if (s->incnt + len > s->inlen) return 2; /* not enough input */ + if (s->out != NULL) { + if (s->outcnt + len > s->outlen) + return 1; /* not enough output space */ + while (len--) + s->out[s->outcnt++] = s->in[s->incnt++]; + } + else { /* just scanning */ + s->outcnt += len; + s->incnt += len; + } + + /* done with a valid stored block */ + return 0; +} + +/* + * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of + * each length, which for a canonical code are stepped through in order. + * symbol[] are the symbol values in canonical order, where the number of + * entries is the sum of the counts in count[]. The decoding process can be + * seen in the function decode() below. + */ +struct huffman { + int16_t *count; /* number of symbols of each length */ + int16_t *symbol; /* canonically ordered symbols */ +}; + +/* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -9 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. A table-based decoding + * scheme (as used in zlib) does not need to do this reversal. + * + * - The first code for the shortest length is all zeros. Subsequent codes of + * the same length are simply integer increments of the previous code. When + * moving up a length, a zero bit is appended to the code. For a complete + * code, the last code of the longest length will be all ones. + * + * - Incomplete codes are handled by this decoder, since they are permitted + * in the deflate format. See the format notes for fixed() and dynamic(). + */ +local int32_t decode(struct state *s, struct huffman *h) +{ + int32_t len; /* current number of bits in code */ + int32_t code; /* len bits being decoded */ + int32_t first; /* first code of length len */ + int32_t count; /* number of codes of length len */ + int32_t index; /* index of first code of length len in symbol table */ + int32_t bitbuf; /* bits from stream */ + int32_t left; /* bits left in next or left to process */ + int16_t *next; /* next number of codes */ + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while (1) { + while (left--) { + code |= bitbuf & 1; + bitbuf >>= 1; + count = *next++; + if (code < first + count) { /* if length len, return symbol */ + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS+1) - len; + if (left == 0) break; + if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */ + bitbuf = s->in[s->incnt++]; + if (left > 8) left = 8; + } + return -9; /* ran out of codes */ +} + +/* + * Given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. The + * return value is zero for a complete code set, negative for an over- + * subscribed code set, and positive for an incomplete code set. The tables + * can be used if the return value is zero or positive, but they cannot be used + * if the return value is negative. If the return value is zero, it is not + * possible for decode() using that table to return an error--any stream of + * enough bits will resolve to a symbol. If the return value is positive, then + * it is possible for decode() using that table to return an error for received + * codes past the end of the incomplete lengths. + * + * Not used by decode(), but used for error checking, h->count[0] is the number + * of the n symbols not in the code. So n - h->count[0] is the number of + * codes. This is useful for checking for incomplete codes that have more than + * one symbol, which is an error in a dynamic block. + * + * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS + * This is assured by the construction of the length arrays in dynamic() and + * fixed() and is not verified by construct(). + * + * Format notes: + * + * - Permitted and expected examples of incomplete codes are one of the fixed + * codes and any code with a single symbol which in deflate is coded as one + * bit instead of zero bits. See the format notes for fixed() and dynamic(). + * + * - Within a given code length, the symbols are kept in ascending order for + * the code bits definition. + */ +local int32_t construct(struct huffman *h, int16_t *length, int32_t n) +{ + int32_t symbol; /* current symbol when stepping through length[] */ + int32_t len; /* current length when stepping through h->count[] */ + int32_t left; /* number of possible codes left of current length */ + int16_t offs[MAXBITS+1]; /* offsets in symbol table for each length */ + + /* count number of codes of each length */ + for (len = 0; len <= MAXBITS; len++) + h->count[len] = 0; + for (symbol = 0; symbol < n; symbol++) + (h->count[length[symbol]])++; /* assumes lengths are within bounds */ + if (h->count[0] == n) /* no codes! */ + return 0; /* complete, but decode() will fail */ + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; /* one possible code of zero length */ + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; /* one more bit, double codes left */ + left -= h->count[len]; /* deduct count from possible codes */ + if (left < 0) return left; /* over-subscribed--return negative */ + } /* left > 0 means incomplete */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + h->count[len]; + + /* + * put symbols in table sorted by length, by symbol order within each + * length + */ + for (symbol = 0; symbol < n; symbol++) + if (length[symbol] != 0) + h->symbol[offs[length[symbol]]++] = symbol; + + /* return zero for complete set, positive for incomplete set */ + return left; +} + +/* + * Decode literal/length and distance codes until an end-of-block code. + * + * Format notes: + * + * - Compressed data that is after the block type if fixed or after the code + * description if dynamic is a combination of literals and length/distance + * pairs terminated by and end-of-block code. Literals are simply Huffman + * coded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - Literals, lengths, and the end-of-block code are combined into a single + * code of up to 286 symbols. They are 256 literals (0..255), 29 length + * symbols (257..285), and the end-of-block symbol (256). + * + * - There are 256 possible lengths (3..258), and so 29 symbols are not enough + * to represent all of those. Lengths 3..10 and 258 are in fact represented + * by just a length symbol. Lengths 11..257 are represented as a symbol and + * some number of extra bits that are added as an integer to the base length + * of the length symbol. The number of extra bits is determined by the base + * length symbol. These are in the static arrays below, lens[] for the base + * lengths and lext[] for the corresponding number of extra bits. + * + * - The reason that 258 gets its own symbol is that the longest length is used + * often in highly redundant files. Note that 258 can also be coded as the + * base value 227 plus the maximum extra value of 31. While a good deflate + * should never do this, it is not an error, and should be decoded properly. + * + * - If a length is decoded, including its extra bits if any, then it is + * followed a distance code. There are up to 30 distance symbols. Again + * there are many more possible distances (1..32768), so extra bits are added + * to a base value represented by the symbol. The distances 1..4 get their + * own symbol, but the rest require extra bits. The base distances and + * corresponding number of extra bits are below in the static arrays dist[] + * and dext[]. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 258 + * simply copies the last byte 258 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. You should not use memcpy() since its behavior is not + * defined for overlapped arrays. You should not use memmove() or bcopy() + * since though their behavior -is- defined for overlapping arrays, it is + * defined to do the wrong thing in this case. + */ +local int32_t codes(struct state *s, + struct huffman *lencode, + struct huffman *distcode) +{ + int32_t symbol; /* decoded symbol */ + int32_t len; /* length for copy */ + uint32_t dist; /* distance for copy */ + static const int16_t lens[29] = { /* Size base for length codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; + static const int16_t lext[29] = { /* Extra bits for length codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; + static const int16_t dists[30] = { /* Offset base for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; + static const int16_t dext[30] = { /* Extra bits for distance codes 0..29 */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + + /* decode literals and length/distance pairs */ + do { + symbol = decode(s, lencode); + if (symbol < 0) return symbol; /* invalid symbol */ + if (symbol < 256) { /* literal: symbol is the byte */ + /* write out the literal */ + if (s->out != NULL) { + if (s->outcnt == s->outlen) return 1; + s->out[s->outcnt] = symbol; + } + s->outcnt++; + } + else if (symbol > 256) { /* length */ + /* get and compute length */ + symbol -= 257; + if (symbol >= 29) return -9; /* invalid fixed code */ + len = lens[symbol] + bits(s, lext[symbol]); + + /* get and check distance */ + symbol = decode(s, distcode); + if (symbol < 0) return symbol; /* invalid symbol */ + dist = dists[symbol] + bits(s, dext[symbol]); + if (dist > s->outcnt) + return -10; /* distance too far back */ + + /* copy length bytes from distance bytes back */ + if (s->out != NULL) { + if (s->outcnt + len > s->outlen) return 1; + while (len--) { + s->out[s->outcnt] = s->out[s->outcnt - dist]; + s->outcnt++; + } + } + else + s->outcnt += len; + } + } while (symbol != 256); /* end of block symbol */ + + /* done with a valid fixed or dynamic block */ + return 0; +} + +/* + * Process a fixed codes block. + * + * Format notes: + * + * - This block type can be useful for compressing small amounts of data for + * which the size of the code descriptions in a dynamic block exceeds the + * benefit of custom codes for that block. For fixed codes, no bits are + * spent on code descriptions. Instead the code lengths for literal/length + * codes and distance codes are fixed. The specific lengths for each symbol + * can be seen in the "for" loops below. + * + * - The literal/length code is complete, but has two symbols that are invalid + * and should result in an error if received. This cannot be implemented + * simply as an incomplete code since those two symbols are in the "middle" + * of the code. They are eight bits long and the longest literal/length\ + * code is nine bits. Therefore the code must be constructed with those + * symbols, and the invalid symbols must be detected after decoding. + * + * - The fixed distance codes also have two invalid symbols that should result + * in an error if received. Since all of the distance codes are the same + * length, this can be implemented as an incomplete code. Then the invalid + * codes are detected while decoding. + */ +local int32_t fixed(struct state *s) +{ + static int32_t virgin = 1; + static int16_t lencnt[MAXBITS+1], lensym[FIXLCODES]; + static int16_t distcnt[MAXBITS+1], distsym[MAXDCODES]; + static struct huffman lencode = {lencnt, lensym}; + static struct huffman distcode = {distcnt, distsym}; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + int32_t symbol; + int16_t lengths[FIXLCODES]; + + /* literal/length table */ + for (symbol = 0; symbol < 144; symbol++) + lengths[symbol] = 8; + for (; symbol < 256; symbol++) + lengths[symbol] = 9; + for (; symbol < 280; symbol++) + lengths[symbol] = 7; + for (; symbol < FIXLCODES; symbol++) + lengths[symbol] = 8; + construct(&lencode, lengths, FIXLCODES); + + /* distance table */ + for (symbol = 0; symbol < MAXDCODES; symbol++) + lengths[symbol] = 5; + construct(&distcode, lengths, MAXDCODES); + + /* do this just once */ + virgin = 0; + } + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Process a dynamic codes block. + * + * Format notes: + * + * - A dynamic block starts with a description of the literal/length and + * distance codes for that block. New dynamic blocks allow the compressor to + * rapidly adapt to changing data with new codes optimized for that data. + * + * - The codes used by the deflate format are "canonical", which means that + * the actual bits of the codes are generated in an unambiguous way simply + * from the number of bits in each code. Therefore the code descriptions + * are simply a list of code lengths for each symbol. + * + * - The code lengths are stored in order for the symbols, so lengths are + * provided for each of the literal/length symbols, and for each of the + * distance symbols. + * + * - If a symbol is not used in the block, this is represented by a zero as + * as the code length. This does not mean a zero-length code, but rather + * that no code should be created for this symbol. There is no way in the + * deflate format to represent a zero-length code. + * + * - The maximum number of bits in a code is 15, so the possible lengths for + * any code are 1..15. + * + * - The fact that a length of zero is not permitted for a code has an + * interesting consequence. Normally if only one symbol is used for a given + * code, then in fact that code could be represented with zero bits. However + * in deflate, that code has to be at least one bit. So for example, if + * only a single distance base symbol appears in a block, then it will be + * represented by a single code of length one, in particular one 0 bit. This + * is an incomplete code, since if a 1 bit is received, it has no meaning, + * and should result in an error. So incomplete distance codes of one symbol + * should be permitted, and the receipt of invalid codes should be handled. + * + * - It is also possible to have a single literal/length code, but that code + * must be the end-of-block code, since every dynamic block has one. This + * is not the most efficient way to create an empty block (an empty fixed + * block is fewer bits), but it is allowed by the format. So incomplete + * literal/length codes of one symbol should also be permitted. + * + * - If there are only literal codes and no lengths, then there are no distance + * codes. This is represented by one distance code with zero bits. + * + * - The list of up to 286 length/literal lengths and up to 30 distance lengths + * are themselves compressed using Huffman codes and run-length encoding. In + * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means + * that length, and the symbols 16, 17, and 18 are run-length instructions. + * Each of 16, 17, and 18 are follwed by extra bits to define the length of + * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10 + * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols + * are common, hence the special coding for zero lengths. + * + * - The symbols for 0..18 are Huffman coded, and so that code must be + * described first. This is simply a sequence of up to 19 three-bit values + * representing no code (0) or the code length for that symbol (1..7). + * + * - A dynamic block starts with three fixed-size counts from which is computed + * the number of literal/length code lengths, the number of distance code + * lengths, and the number of code length code lengths (ok, you come up with + * a better name!) in the code descriptions. For the literal/length and + * distance codes, lengths after those provided are considered zero, i.e. no + * code. The code length code lengths are received in a permuted order (see + * the order[] array below) to make a short code length code length list more + * likely. As it turns out, very short and very long codes are less likely + * to be seen in a dynamic code description, hence what may appear initially + * to be a peculiar ordering. + * + * - Given the number of literal/length code lengths (nlen) and distance code + * lengths (ndist), then they are treated as one long list of nlen + ndist + * code lengths. Therefore run-length coding can and often does cross the + * boundary between the two sets of lengths. + * + * - So to summarize, the code description at the start of a dynamic block is + * three counts for the number of code lengths for the literal/length codes, + * the distance codes, and the code length codes. This is followed by the + * code length code lengths, three bits each. This is used to construct the + * code length code which is used to read the remainder of the lengths. Then + * the literal/length code lengths and distance lengths are read as a single + * set of lengths using the code length codes. Codes are constructed from + * the resulting two sets of lengths, and then finally you can start + * decoding actual compressed data in the block. + * + * - For reference, a "typical" size for the code description in a dynamic + * block is around 80 bytes. + */ +local int32_t dynamic(struct state *s) +{ + int32_t nlen, ndist, ncode; /* number of lengths in descriptor */ + int32_t index; /* index of lengths[] */ + int32_t err; /* construct() return value */ + int16_t lengths[MAXCODES]; /* descriptor code lengths */ + int16_t lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */ + int16_t distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */ + struct huffman lencode = {lencnt, lensym}; /* length code */ + struct huffman distcode = {distcnt, distsym}; /* distance code */ + static const int16_t order[19] = /* permutation of code length codes */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* get number of lengths in each table, check lengths */ + nlen = bits(s, 5) + 257; + ndist = bits(s, 5) + 1; + ncode = bits(s, 4) + 4; + if (nlen > MAXLCODES || ndist > MAXDCODES) + return -3; /* bad counts */ + + /* read code length code lengths (really), missing lengths are zero */ + for (index = 0; index < ncode; index++) + lengths[order[index]] = bits(s, 3); + for (; index < 19; index++) + lengths[order[index]] = 0; + + /* build huffman table for code lengths codes (use lencode temporarily) */ + err = construct(&lencode, lengths, 19); + if (err != 0) return -4; /* require complete code set here */ + + /* read length/literal and distance code length tables */ + index = 0; + while (index < nlen + ndist) { + int32_t symbol; /* decoded value */ + int32_t len; /* last length to repeat */ + + symbol = decode(s, &lencode); + if (symbol < 16) /* length in 0..15 */ + lengths[index++] = symbol; + else { /* repeat instruction */ + len = 0; /* assume repeating zeros */ + if (symbol == 16) { /* repeat last length 3..6 times */ + if (index == 0) return -5; /* no last length! */ + len = lengths[index - 1]; /* last length */ + symbol = 3 + bits(s, 2); + } + else if (symbol == 17) /* repeat zero 3..10 times */ + symbol = 3 + bits(s, 3); + else /* == 18, repeat zero 11..138 times */ + symbol = 11 + bits(s, 7); + if (index + symbol > nlen + ndist) + return -6; /* too many lengths! */ + while (symbol--) /* repeat last or zero symbol times */ + lengths[index++] = len; + } + } + + /* build huffman table for literal/length codes */ + err = construct(&lencode, lengths, nlen); + if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1)) + return -7; /* only allow incomplete codes if just one code */ + + /* build huffman table for distance codes */ + err = construct(&distcode, lengths + nlen, ndist); + if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1)) + return -8; /* only allow incomplete codes if just one code */ + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Inflate source to dest. On return, destlen and sourcelen are updated to the + * size of the uncompressed data and the size of the deflate data respectively. + * On success, the return value of puff() is zero. If there is an error in the + * source data, i.e. it is not in the deflate format, then a negative value is + * returned. If there is not enough input available or there is not enough + * output space, then a positive error is returned. In that case, destlen and + * sourcelen are not updated to facilitate retrying from the beginning with the + * provision of more input data or more output space. In the case of invalid + * inflate data (a negative error), the dest and source pointers are updated to + * facilitate the debugging of deflators. + * + * puff() also has a mode to determine the size of the uncompressed output with + * no output written. For this dest must be (uint8_t *)0. In this case, + * the input value of *destlen is ignored, and on return *destlen is set to the + * size of the uncompressed output. + * + * The return codes are: + * + * 2: available inflate data did not terminate + * 1: output space exhausted before completing inflate + * 0: successful inflate + * -1: invalid block type (type == 3) + * -2: stored block length did not match one's complement + * -3: dynamic block code description: too many length or distance codes + * -4: dynamic block code description: code lengths codes incomplete + * -5: dynamic block code description: repeat lengths with no first length + * -6: dynamic block code description: repeat more than specified lengths + * -7: dynamic block code description: invalid literal/length code lengths + * -8: dynamic block code description: invalid distance code lengths + * -9: invalid literal/length or distance code in fixed or dynamic block + * -10: distance is too far back in fixed or dynamic block + * + * Format notes: + * + * - Three bits are read for each block to determine the kind of block and + * whether or not it is the last block. Then the block is decoded and the + * process repeated if it was not the last block. + * + * - The leftover bits in the last byte of the deflate data after the last + * block (if it was a fixed or dynamic block) are undefined and have no + * expected values to check. + */ +int32_t puff(uint8_t *dest, /* pointer to destination pointer */ + uint32_t *destlen, /* amount of output space */ + uint8_t *source, /* pointer to source data pointer */ + uint32_t *sourcelen) /* amount of input available */ +{ + struct state s; /* input/output state */ + int32_t last, type; /* block information */ + int32_t err; /* return value */ + + /* initialize output state */ + s.out = dest; + s.outlen = *destlen; /* ignored if dest is NULL */ + s.outcnt = 0; + + /* initialize input state */ + s.in = source; + s.inlen = *sourcelen; + s.incnt = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + /* return if bits() or decode() tries to read past available input */ + if (setjmp(s.env) != 0) /* if came back here via longjmp() */ + err = 2; /* then skip do-loop, return error */ + else { + /* process blocks until last block or error */ + do { + last = bits(&s, 1); /* one if last block */ + type = bits(&s, 2); /* block type 0..3 */ + err = type == 0 ? stored(&s) : + (type == 1 ? fixed(&s) : + (type == 2 ? dynamic(&s) : + -1)); /* type == 3, invalid */ + if (err != 0) break; /* return with error */ + } while (!last); + } + + /* update the lengths and return */ + if (err <= 0) { + *destlen = s.outcnt; + *sourcelen = s.incnt; + } + return err; +} diff --git a/src/qcommon/puff.h b/src/qcommon/puff.h new file mode 100644 index 00000000..14070f64 --- /dev/null +++ b/src/qcommon/puff.h @@ -0,0 +1,43 @@ +/* + * This is a modified version of Mark Adlers work, + * see below for the original copyright. + * 2006 - Joerg Dietrich <dietrich_joerg@gmx.de> + */ + +/* puff.h + Copyright (C) 2002, 2003 Mark Adler, all rights reserved + version 1.7, 3 Mar 2002 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +#ifndef __PUFF_H +#define __PUFF_H + +#include "q_shared.h" /* for definitions of the <stdint.h> types */ + +/* + * See puff.c for purpose and usage. + */ +int32_t puff(uint8_t *dest, /* pointer to destination pointer */ + uint32_t *destlen, /* amount of output space */ + uint8_t *source, /* pointer to source data pointer */ + uint32_t *sourcelen); /* amount of input available */ + +#endif // __PUFF_H diff --git a/src/qcommon/q_math.c b/src/qcommon/q_math.c index 196d2f55..0973f31c 100644 --- a/src/qcommon/q_math.c +++ b/src/qcommon/q_math.c @@ -1086,6 +1086,53 @@ void AddPointToBounds( const vec3_t v, vec3_t mins, vec3_t maxs ) { } } +qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs, + const vec3_t mins2, const vec3_t maxs2) +{ + if ( maxs[0] < mins2[0] || + maxs[1] < mins2[1] || + maxs[2] < mins2[2] || + mins[0] > maxs2[0] || + mins[1] > maxs2[1] || + mins[2] > maxs2[2]) + { + return qfalse; + } + + return qtrue; +} + +qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs, + const vec3_t origin, vec_t radius) +{ + if ( origin[0] - radius > maxs[0] || + origin[0] + radius < mins[0] || + origin[1] - radius > maxs[1] || + origin[1] + radius < mins[1] || + origin[2] - radius > maxs[2] || + origin[2] + radius < mins[2]) + { + return qfalse; + } + + return qtrue; +} + +qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs, + const vec3_t origin) +{ + if ( origin[0] > maxs[0] || + origin[0] < mins[0] || + origin[1] > maxs[1] || + origin[1] < mins[1] || + origin[2] > maxs[2] || + origin[2] < mins[2]) + { + return qfalse; + } + + return qtrue; +} vec_t VectorNormalize( vec3_t v ) { // NOTE: TTimo - Apple G4 altivec source uses double? diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h index 50a0b10c..2e3a153c 100644 --- a/src/qcommon/q_shared.h +++ b/src/qcommon/q_shared.h @@ -559,7 +559,7 @@ vec_t VectorLengthSquared( const vec3_t v ); vec_t Distance( const vec3_t p1, const vec3_t p2 ); vec_t DistanceSquared( const vec3_t p1, const vec3_t p2 ); - + void VectorNormalizeFast( vec3_t v ); void VectorInverse( vec3_t v ); @@ -593,6 +593,13 @@ void AxisCopy( vec3_t in[3], vec3_t out[3] ); void SetPlaneSignbits( struct cplane_s *out ); int BoxOnPlaneSide (vec3_t emins, vec3_t emaxs, struct cplane_s *plane); +qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs, + const vec3_t mins2, const vec3_t maxs2); +qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs, + const vec3_t origin, vec_t radius); +qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs, + const vec3_t origin); + float AngleMod(float a); float LerpAngle (float from, float to, float frac); float AngleSubtract( float a1, float a2 ); diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c index c0a703bc..d298b755 100644 --- a/src/qcommon/vm_x86.c +++ b/src/qcommon/vm_x86.c @@ -213,7 +213,7 @@ void callAsmCall(void) // arbitrarily named (though this is not true for the MSC version). When a vm // makes a system call, control jumps straight to the doAsmCall label. void AsmCall( void ) { - asm( CMANG(doAsmCall) ": \n\t" \ + __asm__( CMANG(doAsmCall) ": \n\t" \ " movl (%%edi),%%eax \n\t" \ " subl $4,%%edi \n\t" \ " orl %%eax,%%eax \n\t" \ diff --git a/src/qcommon/vm_x86_64.c b/src/qcommon/vm_x86_64.c index e8e827e5..814acfef 100644 --- a/src/qcommon/vm_x86_64.c +++ b/src/qcommon/vm_x86_64.c @@ -29,9 +29,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include <sys/stat.h> #include <sys/types.h> #include <sys/wait.h> +#include <sys/time.h> +#include <time.h> #include <fcntl.h> #include <errno.h> #include <unistd.h> +#include <stdarg.h> + +//#define USE_GAS +//#define DEBUG_VM #ifdef DEBUG_VM #define Dfprintf(fd, args...) fprintf(fd, ##args) @@ -40,6 +46,19 @@ static FILE* qdasmout; #define Dfprintf(args...) #endif +#define VM_X86_64_MMAP + +#ifndef USE_GAS +void assembler_set_output(char* buf); +size_t assembler_get_code_size(void); +void assembler_init(int pass); +void assemble_line(const char* input, size_t len); +#ifdef Dfprintf +#undef Dfprintf +#define Dfprintf(args...) +#endif +#endif // USE_GAS + static void VM_Destroy_Compiled(vm_t* self); /* @@ -207,8 +226,29 @@ static unsigned char op_argsize[256] = [OP_BLOCK_COPY] = 4, }; +#ifdef USE_GAS #define emit(x...) \ do { fprintf(fh_s, ##x); fputc('\n', fh_s); } while(0) +#else +void emit(const char* fmt, ...) +{ + va_list ap; + char line[4096]; + va_start(ap, fmt); + vsnprintf(line, sizeof(line), fmt, ap); + va_end(ap); + assemble_line(line, strlen(line)); +} +#endif // USE_GAS + +#ifdef USE_GAS +#define JMPIARG \ + emit("jmp i_%08x", iarg); +#else +#define JMPIARG \ + emit("movq $%lu, %%rax", vm->codeBase+vm->instructionPointers[iarg]); \ + emit("jmpq *%rax"); +#endif // integer compare and jump #define IJ(op) \ @@ -216,7 +256,8 @@ static unsigned char op_argsize[256] = emit("movl 4(%%rsi), %%eax"); \ emit("cmpl 8(%%rsi), %%eax"); \ emit(op " i_%08x", instruction+1); \ - emit("jmp i_%08x", iarg); + JMPIARG \ + neednilabel = 1; #ifdef USE_X87 #define FJ(bits, op) \ @@ -226,7 +267,8 @@ static unsigned char op_argsize[256] = emit("fnstsw %%ax");\ emit("testb $" #bits ", %%ah");\ emit(op " i_%08x", instruction+1);\ - emit("jmp i_%08x", iarg); + JMPIARG \ + neednilabel = 1; #define XJ(x) #else #define FJ(x, y) @@ -236,7 +278,8 @@ static unsigned char op_argsize[256] = emit("ucomiss 8(%%rsi), %%xmm0");\ emit("jp i_%08x", instruction+1);\ emit(op " i_%08x", instruction+1);\ - emit("jmp i_%08x", iarg); + JMPIARG \ + neednilabel = 1; #endif #define SIMPLE(op) \ @@ -293,9 +336,14 @@ static unsigned char op_argsize[256] = static void* getentrypoint(vm_t* vm) { +#ifdef USE_GAS return vm->codeBase+64; // skip ELF header +#else + return vm->codeBase; +#endif // USE_GAS } +#ifdef USE_GAS char* mmapfile(const char* fn, size_t* size) { int fd = -1; @@ -383,6 +431,7 @@ static int doas(char* in, char* out, unsigned char** compiledcode) return size; } +#endif // USE_GAS static void block_copy_vm(unsigned dest, unsigned src, unsigned count) { @@ -411,8 +460,13 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { char* code; unsigned iarg = 0; unsigned char barg = 0; - void* entryPoint; + int neednilabel = 0; + struct timeval tvstart = {0, 0}; +#ifdef USE_GAS + byte* compiledcode; + int compiledsize; + void* entryPoint; char fn_s[2*MAX_QPATH]; // output file for assembler code char fn_o[2*MAX_QPATH]; // file written by as #ifdef DEBUG_VM @@ -420,16 +474,16 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { #endif FILE* fh_s; int fd_s, fd_o; - byte* compiledcode; - int compiledsize; + + gettimeofday(&tvstart, NULL); Com_Printf("compiling %s\n", vm->name); #ifdef DEBUG_VM snprintf(fn_s, sizeof(fn_s), "%.63s.s", vm->name); snprintf(fn_o, sizeof(fn_o), "%.63s.o", vm->name); - fd_s = open(fn_s, O_CREAT|O_WRONLY, 0644); - fd_o = open(fn_o, O_CREAT|O_WRONLY, 0644); + fd_s = open(fn_s, O_CREAT|O_WRONLY|O_TRUNC, 0644); + fd_o = open(fn_o, O_CREAT|O_WRONLY|O_TRUNC, 0644); #else snprintf(fn_s, sizeof(fn_s), "/tmp/%.63s.s_XXXXXX", vm->name); snprintf(fn_o, sizeof(fn_o), "/tmp/%.63s.o_XXXXXX", vm->name); @@ -463,25 +517,50 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { return; } - // translate all instructions - pc = 0; - code = (char *)header + header->codeOffset; - emit("start:"); emit("or %%r8, %%r8"); // check whether to set up instruction pointers emit("jnz main"); emit("jmp setupinstructionpointers"); emit("main:"); +#else // USE_GAS + int pass; + size_t compiledOfs = 0; + + gettimeofday(&tvstart, NULL); + + for (pass = 0; pass < 2; ++pass) { + + if(pass) + { + compiledOfs = assembler_get_code_size(); + vm->codeLength = compiledOfs; + vm->codeBase = mmap(NULL, compiledOfs, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); + if(vm->codeBase == (void*)-1) + Com_Error(ERR_DROP, "VM_CompileX86: can't mmap memory"); + + assembler_set_output((char*)vm->codeBase); + } + + assembler_init(pass); + +#endif // USE_GAS + + // translate all instructions + pc = 0; + code = (char *)header + header->codeOffset; for ( instruction = 0; instruction < header->instructionCount; ++instruction ) { op = code[ pc ]; ++pc; - vm->instructionPointers[instruction] = pc; +#ifndef USE_GAS + vm->instructionPointers[instruction] = assembler_get_code_size(); +#endif -#if 0 + /* store current instruction number in r15 for debugging */ +#if 1 emit("nop"); emit("movq $%d, %%r15", instruction); emit("nop"); @@ -502,7 +581,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { { Dfprintf(qdasmout, "%s\n", opnames[op]); } + +#ifdef USE_GAS emit("i_%08x:", instruction); +#else + if(neednilabel) + { + emit("i_%08x:", instruction); + neednilabel = 0; + } +#endif + switch ( op ) { case OP_UNDEF: @@ -561,6 +650,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { // emit("frstor 4(%%rsi)"); emit("addq $4, %%rsi"); emit("movl %%eax, (%%rsi)"); // store return value + neednilabel = 1; break; case OP_PUSH: emit("addq $4, %%rsi"); @@ -629,7 +719,8 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { emit("jp dojump_i_%08x", instruction); emit("jz i_%08x", instruction+1); emit("dojump_i_%08x:", instruction); - emit("jmp i_%08x", iarg); + JMPIARG + neednilabel = 1; #endif break; case OP_LTF: @@ -856,7 +947,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { } } - +#ifdef USE_GAS emit("setupinstructionpointers:"); emit("movq $%lu, %%rax", (unsigned long)vm->instructionPointers); for ( instruction = 0; instruction < header->instructionCount; ++instruction ) @@ -889,8 +980,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { vm->codeBase = compiledcode; // remember to skip ELF header! vm->codeLength = compiledsize; +#else // USE_GAS + } + assembler_init(0); + + if(mprotect(vm->codeBase, compiledOfs, PROT_READ|PROT_EXEC)) + Com_Error(ERR_DROP, "VM_CompileX86: mprotect failed"); +#endif // USE_GAS + vm->destroy = VM_Destroy_Compiled; +#ifdef USE_GAS entryPoint = getentrypoint(vm); // __asm__ __volatile__ ("int3"); @@ -911,8 +1011,6 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) { fclose(qdasmout); #endif - Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength ); - out: close(fd_o); @@ -923,12 +1021,30 @@ out: unlink(fn_s); } #endif +#endif // USE_GAS + + if(vm->compiled) + { + struct timeval tvdone = {0, 0}; + struct timeval dur = {0, 0}; + Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength ); + + gettimeofday(&tvdone, NULL); + timersub(&tvdone, &tvstart, &dur); + Com_Printf( "compilation took %lu.%06lu seconds\n", dur.tv_sec, dur.tv_usec ); + } } void VM_Destroy_Compiled(vm_t* self) { +#ifdef USE_GAS munmap(self->codeBase, self->codeLength); +#elif _WIN32 + VirtualFree(self->codeBase, self->codeLength, MEM_RELEASE); +#else + munmap(self->codeBase, self->codeLength); +#endif } /* diff --git a/src/qcommon/vm_x86_64_assembler.c b/src/qcommon/vm_x86_64_assembler.c new file mode 100644 index 00000000..1eda764f --- /dev/null +++ b/src/qcommon/vm_x86_64_assembler.c @@ -0,0 +1,1419 @@ +/* +=========================================================================== +vm_x86_64_assembler.c -- assembler for x86-64 + +Copyright (C) 2007 Ludwig Nussel <ludwig.nussel@suse.de>, Novell inc. + +Quake III Arena source code is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of the License, +or (at your option) any later version. + +Quake III Arena source code is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Quake III Arena source code; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +=========================================================================== +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long u64; + +static char* out; +static unsigned compiledOfs; +static unsigned assembler_pass; + +static const char* cur_line; + +static FILE* fout; + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +#define crap(fmt, args...) do { \ + _crap(__FUNCTION__, fmt, ##args); \ +} while(0) + +#define CRAP_INVALID_ARGS crap("invalid arguments %s, %s", argtype2str(arg1.type),argtype2str(arg2.type)); + +#ifdef DEBUG +#define debug(fmt, args...) printf(fmt, ##args) +#else +#define debug(fmt, args...) +#endif + +static void _crap(const char* func, const char* fmt, ...) +{ + va_list ap; + fprintf(stderr, "%s() - ", func); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + if(cur_line && cur_line[0]) + fprintf(stderr, "-> %s\n", cur_line); + exit(1); +} + +static void emit1(unsigned char v) +{ + if(assembler_pass) + { + out[compiledOfs++] = v; + if(fout) fwrite(&v, 1, 1, fout); + debug("%02hhx ", v); + } + else + { + ++compiledOfs; + } +} + +static inline void emit2(u16 v) +{ + emit1(v&0xFF); + emit1((v>>8)&0xFF); +} + +static inline void emit4(u32 v) +{ + emit1(v&0xFF); + emit1((v>>8)&0xFF); + emit1((v>>16)&0xFF); + emit1((v>>24)&0xFF); +} + +static inline void emit8(u64 v) +{ + emit4(v&0xFFFFFFFF); + emit4((v>>32)&0xFFFFFFFF); +} + +enum { + REX_W = 0x08, + REX_R = 0x04, + REX_X = 0x02, + REX_B = 0x01, +}; + +enum { + MODRM_MOD_00 = 0x00, + MODRM_MOD_01 = 0x01 << 6, + MODRM_MOD_10 = 0x02 << 6, + MODRM_MOD_11 = 0x03 << 6, + MODRM_RM_SIB = 0x04, +}; + +typedef enum +{ + T_NONE = 0x00, + T_REGISTER = 0x01, + T_IMMEDIATE = 0x02, + T_MEMORY = 0x04, + T_LABEL = 0x08, + T_ABSOLUTE = 0x80 +} argtype_t; + +typedef enum { + R_8 = 0x100, + R_16 = 0x200, + R_64 = 0x800, + R_MSZ = 0xF00, // size mask + R_XMM = 0x2000, // xmm register. year, sucks + R_EAX = 0x00, + R_EBX = 0x03, + R_ECX = 0x01, + R_EDX = 0x02, + R_ESI = 0x06, + R_EDI = 0x07, + R_ESP = 0x04, + R_RAX = R_EAX | R_64, + R_RBX = R_EBX | R_64, + R_RCX = R_ECX | R_64, + R_RDX = R_EDX | R_64, + R_RSI = R_ESI | R_64, + R_RDI = R_EDI | R_64, + R_RSP = R_ESP | R_64, + R_R8 = 0x08 | R_64, + R_R9 = 0x09 | R_64, + R_R10 = 0x0A | R_64, + R_R15 = 0x0F | R_64, + R_AL = R_EAX | R_8, + R_AX = R_EAX | R_16, + R_CL = R_ECX | R_8, + R_XMM0 = 0x00 | R_XMM, + R_MGP = 0x0F, // mask for general purpose registers +} reg_t; + +typedef enum { + MODRM_SIB = 0, + MODRM_NOSIB = 0x3, +} modrm_sib_t; + +typedef struct { + unsigned disp; + argtype_t basetype; + union { + u64 imm; + reg_t reg; + } base; + argtype_t indextype; + union { + u64 imm; + reg_t reg; + } index; + unsigned scale; +} memref_t; + +#define LABELLEN 32 + +typedef struct { + argtype_t type; + union { + u64 imm; + reg_t reg; + memref_t mem; + char label[LABELLEN]; + } v; + int absolute:1; +} arg_t; + +typedef void (*emitfunc)(const char* op, arg_t arg1, arg_t arg2, void* data); + +typedef struct { + char* mnemonic; + emitfunc func; + void* data; +} op_t; + +typedef struct { + u8 xmmprefix; + u8 subcode; // in modrm + u8 rmcode; // opcode for reg/mem, reg + u8 mrcode; // opcode for reg, reg/mem + u8 rcode8; // opcode for reg8/mem8 + u8 rcode; // opcode for reg/mem +} opparam_t; + +/* ************************* */ + +static unsigned hashkey(const char *string, unsigned len) { + unsigned register hash, i; + + hash = 0; + for (i = 0; i < len && string[i] != '\0'; ++i) { + hash += string[i] * (119 + i); + } + hash = (hash ^ (hash >> 10) ^ (hash >> 20)); + return hash; +} + +struct hashentry { + char* label; + unsigned address; + struct hashentry* next; +}; +static struct hashentry* labelhash[1021]; + +// no dup check! +static void hash_add_label(const char* label, unsigned address) +{ + struct hashentry* h; + unsigned i = hashkey(label, -1U); + i %= sizeof(labelhash)/sizeof(labelhash[0]); + h = malloc(sizeof(struct hashentry)); + h->label = strdup(label); + h->address = address; + h->next = labelhash[i]; + labelhash[i] = h; +} + +static unsigned lookup_label(const char* label) +{ + struct hashentry* h; + unsigned i = hashkey(label, -1U); + i %= sizeof(labelhash)/sizeof(labelhash[0]); + for(h = labelhash[i]; h; h = h->next ) + { + if(!strcmp(h->label, label)) + return h->address; + } + if(assembler_pass) + crap("label %s undefined", label); + return 0; +} + +static void labelhash_free(void) +{ + struct hashentry* h; + unsigned i; + unsigned z = 0, min = -1U, max = 0, t = 0; + for ( i = 0; i < sizeof(labelhash)/sizeof(labelhash[0]); ++i) + { + unsigned n = 0; + h = labelhash[i]; + while(h) + { + struct hashentry* next = h->next; + free(h->label); + free(h); + h = next; + ++n; + } + t+=n; + if(!n) ++z; + //else printf("%u\n", n); + min = MIN(min, n); + max = MAX(max, n); + } + printf("total %u, hsize %lu, zero %u, min %u, max %u\n", t, sizeof(labelhash)/sizeof(labelhash[0]), z, min, max); + memset(labelhash, 0, sizeof(labelhash)); +} + +/* ************************* */ + + +static const char* argtype2str(argtype_t t) +{ + switch(t) + { + case T_NONE: return "none"; + case T_REGISTER: return "register"; + case T_IMMEDIATE: return "immediate"; + case T_MEMORY: return "memory"; + case T_LABEL: return "label"; + default: crap("invalid type"); + } + /* not reached */ + return T_NONE; +} + +/* ************************* */ + +static inline int iss8(u64 v) +{ + return (labs(v) <= 0x80); +} + +static inline int isu8(u64 v) +{ + return (v <= 0xff); +} + +static inline int iss16(u64 v) +{ + return (labs(v) <= 0x8000); +} + +static inline int isu16(u64 v) +{ + return (v <= 0xffff); +} + +static inline int iss32(u64 v) +{ + return (labs(v) <= 0x80000000); +} + +static inline int isu32(u64 v) +{ + return (v <= 0xffffffff); +} + +static void emit_opsingle(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 op = (u8)((unsigned long) data); + + if(arg1.type != T_NONE || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + emit1(op); +} + +static void emit_opsingle16(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + emit1(0x66); + emit_opsingle(mnemonic, arg1, arg2, data); +} + +static void compute_rexmodrmsib(u8* rex_r, u8* modrm_r, u8* sib_r, arg_t* arg1, arg_t* arg2) +{ + u8 rex = 0; + u8 modrm = 0; + u8 sib = 0; + + if((arg1->type == T_REGISTER && arg2->type == T_REGISTER) + && ((arg1->v.reg & R_MSZ) != (arg2->v.reg & R_MSZ)) + && !((arg1->v.reg & R_XMM) || (arg2->v.reg & R_XMM))) + crap("both registers must be of same width"); + + if((arg1->type == T_REGISTER && arg1->v.reg & R_64) + || (arg2->type == T_REGISTER && arg2->v.reg & R_64)) + { + rex |= REX_W; + } + + if(arg1->type == T_REGISTER) + { + if((arg1->v.reg & R_MGP) > 0x07) + rex |= REX_R; + + modrm |= (arg1->v.reg & 0x07) << 3; + } + + if(arg2->type == T_REGISTER) + { + if((arg2->v.reg & R_MGP) > 0x07) + rex |= REX_B; + + modrm |= (arg2->v.reg & 0x07); + } + + if(arg2->type == T_MEMORY) + { + if((arg2->v.mem.basetype == T_REGISTER && !(arg2->v.mem.base.reg & R_64)) + || (arg2->v.mem.indextype == T_REGISTER && !(arg2->v.mem.index.reg & R_64))) + { + crap("only 64bit base/index registers are %x %x", arg2->v.mem.base.reg, arg2->v.mem.index.reg); + } + + if(arg2->v.mem.indextype == T_REGISTER) + { + modrm |= MODRM_RM_SIB; + if(!arg2->v.mem.disp) + { + modrm |= MODRM_MOD_00; + } + else if(iss8(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_01; + } + else if(isu32(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_10; + } + else + { + crap("invalid displacement"); + } + + if((arg2->v.mem.index.reg & R_MGP) > 0x07) + rex |= REX_X; + + if((arg2->v.mem.base.reg & R_MGP) > 0x07) + rex |= REX_B; + + if(arg2->v.mem.basetype != T_REGISTER) + crap("base must be register"); + switch(arg2->v.mem.scale) + { + case 1: break; + case 2: sib |= 1 << 6; break; + case 4: sib |= 2 << 6; break; + case 8: sib |= 3 << 6; break; + } + sib |= (arg2->v.mem.index.reg & 0x07) << 3; + sib |= (arg2->v.mem.base.reg & 0x07); + } + else if(arg2->v.mem.indextype == T_NONE) + { + if(!arg2->v.mem.disp) + { + modrm |= MODRM_MOD_00; + } + else if(iss8(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_01; + } + else if(isu32(arg2->v.mem.disp)) + { + modrm |= MODRM_MOD_10; + } + else + { + crap("invalid displacement"); + } + + if(arg2->v.mem.basetype != T_REGISTER) + crap("todo: base != register"); + + if((arg2->v.mem.base.reg & R_MGP) > 0x07) + rex |= REX_B; + + modrm |= arg2->v.mem.base.reg & 0x07; + } + else + { + crap("invalid indextype"); + } + } + else + { + modrm |= MODRM_MOD_11; + } + + if(rex) + rex |= 0x40; // XXX + + *rex_r = rex; + *modrm_r = modrm; + *sib_r = sib; +} + +static void maybe_emit_displacement(arg_t* arg) +{ + if(arg->type != T_MEMORY) + return; + + if(arg->v.mem.disp) + { + if(iss8(arg->v.mem.disp)) + { + emit1((u8)arg->v.mem.disp); + } + else if(isu32(arg->v.mem.disp)) + { + emit4(arg->v.mem.disp); + } + else + { + crap("invalid displacement"); + } + } +} + +/* one byte operator with register added to operator */ +static void emit_opreg(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 op = (u8)((unsigned long) data); + + if(arg1.type != T_REGISTER || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + if((arg1.v.reg & R_MGP) > 0x07) + emit1(0x40 | REX_B); + + op |= (arg1.v.reg & 0x07); + + emit1(op); +} + +/* operator which operates on reg/mem */ +static void emit_op_rm(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + opparam_t* params = data; + + if((arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + modrm |= params->subcode << 3; + + if(arg1.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg1.v.reg & R_8) + emit1(params->rcode8); // op reg8/mem8, + else + emit1(params->rcode); // op reg/mem, + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); +} + +/* operator which operates on reg/mem with cl */ +static void emit_op_rm_cl(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + opparam_t* params = data; + + if(arg2.type != T_REGISTER || arg1.type != T_REGISTER) + CRAP_INVALID_ARGS; + + if((arg1.v.reg & R_MGP) != R_ECX && !(arg1.v.reg & R_8)) + crap("only cl register is valid"); + + arg1.type = T_NONE; // don't complain, we know it's cl anyways + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + modrm |= params->subcode << 3; + + if(arg2.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg2.v.reg & R_8) + emit1(params->rcode8); // op reg8/mem8, + else + emit1(params->rcode); // op reg/mem, + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); +} + +static void emit_mov(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex = 0; + u8 modrm = 0; + u8 sib = 0; + + if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER) + { + u8 op = 0xb8; + + if(arg2.v.reg & R_8) + { + if(!isu8(arg1.v.imm)) + crap("value too large for 8bit register"); + + op = 0xb0; + } + else if(arg2.v.reg & R_16) + { + if(!isu16(arg1.v.imm)) + crap("value too large for 16bit register"); + emit1(0x66); + } + else if(!arg2.v.reg & R_64) + { + if(!isu32(arg1.v.imm)) + crap("value too large for 32bit register"); + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(rex) emit1(rex); + + op |= (arg2.v.reg & 0x07); + + emit1(op); + + if(arg2.v.reg & R_8) emit1(arg1.v.imm); + else if(arg2.v.reg & R_16) emit2(arg1.v.imm); + else if(arg2.v.reg & R_64) emit8(arg1.v.imm); + else emit4(arg1.v.imm); + } + else if(arg1.type == T_IMMEDIATE && arg2.type == T_MEMORY) + { + if(!iss32(arg1.v.imm)) + { + crap("only 32bit immediates supported"); + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + if(rex) emit1(rex); + emit1(0xc7); // mov reg/mem, imm + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + emit4(arg1.v.imm); + } + else if(arg1.type == T_REGISTER && arg2.type == T_REGISTER) // XXX: same as next + { + if(arg1.type != T_REGISTER || arg2.type != T_REGISTER) + crap("both args must be registers"); + + if((arg1.v.reg & R_MSZ) != (arg2.v.reg & R_MSZ)) + crap("both registers must be same width"); + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(rex) emit1(rex); + emit1(0x89); // mov reg reg/mem, + emit1(modrm); + } + else if(arg1.type == T_REGISTER && arg2.type == T_MEMORY) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(arg1.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg1.v.reg & R_8) + emit1(0x88); // mov reg reg/mem, + else + emit1(0x89); // mov reg reg/mem, + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); + } + else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + if(arg2.v.reg & R_16) + emit1(0x66); + + if(rex) emit1(rex); + if(arg2.v.reg & R_8) + emit1(0x8a); // mov reg/mem, reg + else + emit1(0x8b); // mov reg/mem, reg + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); + } + else + CRAP_INVALID_ARGS; +} + +static void emit_subaddand(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex = 0; + u8 modrm = 0; + u8 sib = 0; + + opparam_t* params = data; + + if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER) + { + if(!iss32(arg1.v.imm)) + { + crap("only 8 and 32 bit immediates supported"); + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + modrm |= params->subcode << 3; + + if(rex) emit1(rex); +#if 0 + if(isu8(arg1.v.imm)) + { + emit1(0x83); // sub reg/mem, imm8 + emit1(modrm); + emit1(arg1.v.imm&0xFF); + } + else +#endif + { + emit1(0x81); // sub reg/mem, imm32 + emit1(modrm); + emit4(arg1.v.imm); + } + } + else if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER)) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(rex) emit1(rex); + emit1(params->rmcode); // sub reg/mem, reg + emit1(modrm); + if(arg2.type == T_MEMORY && (modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); + } + else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + if(rex) emit1(rex); + emit1(params->mrcode); // sub reg, reg/mem + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); + } + else + CRAP_INVALID_ARGS; +} + +static void emit_condjump(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + unsigned off; + int disp; + unsigned char opcode = (unsigned char)(((unsigned long)data)&0xFF); + + if(arg1.type != T_LABEL || arg2.type != T_NONE) + crap("%s: argument must be label", mnemonic); + + emit1(opcode); + + off = lookup_label(arg1.v.label); + disp = off-(compiledOfs+1); + if(assembler_pass && abs(disp) > 127) + crap("cannot jump that far (%x -> %x = %x)", compiledOfs, off, disp); + + emit1(disp); +} + +static void emit_jmp(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + if((arg1.type != T_LABEL && arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + if(arg1.type == T_LABEL) + { + unsigned off; + int disp; + + off = lookup_label(arg1.v.label); + disp = off-(compiledOfs+5); + emit1(0xe9); + emit4(disp); + } + else + { + u8 rex, modrm, sib; + + if(arg1.type == T_REGISTER) + { + if(!arg1.absolute) + crap("jmp must be absolute"); + + if((arg1.v.reg & R_64) != R_64) + crap("register must be 64bit"); + + arg1.v.reg ^= R_64; // no rex required for call + } + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + modrm |= 0x4 << 3; + + if(rex) emit1(rex); + emit1(0xff); + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + maybe_emit_displacement(&arg1); + } +} + +static void emit_call(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + + if(arg1.type != T_REGISTER || arg2.type != T_NONE) + CRAP_INVALID_ARGS; + + if(!arg1.absolute) + crap("call must be absolute"); + + if((arg1.v.reg & R_64) != R_64) + crap("register must be 64bit"); + + arg1.v.reg ^= R_64; // no rex required for call + + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + modrm |= 0x2 << 3; + + if(rex) emit1(rex); + emit1(0xff); + emit1(modrm); +} + + +static void emit_twobyte(const char* mnemonic, arg_t arg1, arg_t arg2, void* data) +{ + u8 rex, modrm, sib; + + opparam_t* params = data; + + if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER)) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2); + + if(params->xmmprefix) emit1(params->xmmprefix); + if(rex) emit1(rex); + emit1(0x0f); + emit1(params->rmcode); // sub reg/mem, reg + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg2); + } + else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode) + { + compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1); + + if(params->xmmprefix) emit1(params->xmmprefix); + if(rex) emit1(rex); + emit1(0x0f); + emit1(params->mrcode); // sub reg, reg/mem + emit1(modrm); + if((modrm & 0x07) == MODRM_RM_SIB) + emit1(sib); + + maybe_emit_displacement(&arg1); + } + else + CRAP_INVALID_ARGS; +} + +static opparam_t params_add = { subcode: 0, rmcode: 0x01, }; +static opparam_t params_or = { subcode: 1, rmcode: 0x09, }; +static opparam_t params_and = { subcode: 4, rmcode: 0x21, }; +static opparam_t params_sub = { subcode: 5, rmcode: 0x29, }; +static opparam_t params_xor = { subcode: 6, rmcode: 0x31, }; +static opparam_t params_cmp = { subcode: 6, rmcode: 0x39, mrcode: 0x3b, }; +static opparam_t params_dec = { subcode: 1, rcode: 0xff, rcode8: 0xfe, }; +static opparam_t params_sar = { subcode: 7, rcode: 0xd3, rcode8: 0xd2, }; +static opparam_t params_shl = { subcode: 4, rcode: 0xd3, rcode8: 0xd2, }; +static opparam_t params_shr = { subcode: 5, rcode: 0xd3, rcode8: 0xd2, }; +static opparam_t params_idiv = { subcode: 7, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_div = { subcode: 6, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_imul = { subcode: 5, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_mul = { subcode: 4, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_neg = { subcode: 3, rcode: 0xf7, rcode8: 0xf6, }; +static opparam_t params_not = { subcode: 2, rcode: 0xf7, rcode8: 0xf6, }; + +static opparam_t params_cvtsi2ss = { xmmprefix: 0xf3, rmcode: 0x2a }; +static opparam_t params_cvttss2si = { xmmprefix: 0xf3, rmcode: 0x2c }; +static opparam_t params_addss = { xmmprefix: 0xf3, mrcode: 0x58 }; +static opparam_t params_divss = { xmmprefix: 0xf3, mrcode: 0x5e }; +static opparam_t params_movss = { xmmprefix: 0xf3, mrcode: 0x10, rmcode: 0x11 }; +static opparam_t params_mulss = { xmmprefix: 0xf3, mrcode: 0x59 }; +static opparam_t params_subss = { xmmprefix: 0xf3, mrcode: 0x5c }; +static opparam_t params_ucomiss = { mrcode: 0x2e }; + +static int ops_sorted = 0; +static op_t ops[] = { + { "addl", emit_subaddand, ¶ms_add }, + { "addq", emit_subaddand, ¶ms_add }, + { "addss", emit_twobyte, ¶ms_addss }, + { "andl", emit_subaddand, ¶ms_and }, + { "andq", emit_subaddand, ¶ms_and }, + { "callq", emit_call, NULL }, + { "cbw", emit_opsingle16, (void*)0x98 }, + { "cdq", emit_opsingle, (void*)0x99 }, + { "cmpl", emit_subaddand, ¶ms_cmp }, + { "cmpq", emit_subaddand, ¶ms_cmp }, + { "cvtsi2ss", emit_twobyte, ¶ms_cvtsi2ss }, + { "cvttss2si", emit_twobyte, ¶ms_cvttss2si }, + { "cwde", emit_opsingle, (void*)0x98 }, + { "decl", emit_op_rm, ¶ms_dec }, + { "decq", emit_op_rm, ¶ms_dec }, + { "divl", emit_op_rm, ¶ms_div }, + { "divq", emit_op_rm, ¶ms_div }, + { "divss", emit_twobyte, ¶ms_divss }, + { "idivl", emit_op_rm, ¶ms_idiv }, + { "imull", emit_op_rm, ¶ms_imul }, + { "int3", emit_opsingle, (void*)0xcc }, + { "ja", emit_condjump, (void*)0x77 }, + { "jbe", emit_condjump, (void*)0x76 }, + { "jb", emit_condjump, (void*)0x72 }, + { "je", emit_condjump, (void*)0x74 }, + { "jl", emit_condjump, (void*)0x7c }, + { "jmp", emit_jmp, NULL }, + { "jmpq", emit_jmp, NULL }, + { "jnae", emit_condjump, (void*)0x72 }, + { "jna", emit_condjump, (void*)0x76 }, + { "jnbe", emit_condjump, (void*)0x77 }, + { "jnb", emit_condjump, (void*)0x73 }, + { "jnc", emit_condjump, (void*)0x73 }, + { "jne", emit_condjump, (void*)0x75 }, + { "jnge", emit_condjump, (void*)0x7c }, + { "jng", emit_condjump, (void*)0x7e }, + { "jnle", emit_condjump, (void*)0x7f }, + { "jnl", emit_condjump, (void*)0x7d }, + { "jnz", emit_condjump, (void*)0x75 }, + { "jp", emit_condjump, (void*)0x7a }, + { "jz", emit_condjump, (void*)0x74 }, + { "movb", emit_mov, NULL }, + { "movl", emit_mov, NULL }, + { "movq", emit_mov, NULL }, + { "movss", emit_twobyte, ¶ms_movss }, + { "movw", emit_mov, NULL }, + { "mull", emit_op_rm, ¶ms_mul }, + { "mulss", emit_twobyte, ¶ms_mulss }, + { "negl", emit_op_rm, ¶ms_neg }, + { "negq", emit_op_rm, ¶ms_neg }, + { "nop", emit_opsingle, (void*)0x90 }, + { "notl", emit_op_rm, ¶ms_not }, + { "notq", emit_op_rm, ¶ms_not }, + { "or", emit_subaddand, ¶ms_or }, + { "orl", emit_subaddand, ¶ms_or }, + { "pop", emit_opreg, (void*)0x58 }, + { "push", emit_opreg, (void*)0x50 }, + { "ret", emit_opsingle, (void*)0xc3 }, + { "sarl", emit_op_rm_cl, ¶ms_sar }, + { "shl", emit_op_rm_cl, ¶ms_shl }, + { "shrl", emit_op_rm_cl, ¶ms_shr }, + { "subl", emit_subaddand, ¶ms_sub }, + { "subq", emit_subaddand, ¶ms_sub }, + { "subss", emit_twobyte, ¶ms_subss }, + { "ucomiss", emit_twobyte, ¶ms_ucomiss }, + { "xorl", emit_subaddand, ¶ms_xor }, + { "xorq", emit_subaddand, ¶ms_xor }, + { NULL, NULL, NULL } +}; + +static int opsort(const void* A, const void* B) +{ + const op_t* a = A; + const op_t* b = B; + return strcmp(a->mnemonic, b->mnemonic); +} + +static op_t* getop(const char* n) +{ +#if 0 + op_t* o = ops; + while(o->mnemonic) + { + if(!strcmp(o->mnemonic, n)) + return o; + ++o; + } + +#else + unsigned m, t, b; + int r; + t = sizeof(ops)/sizeof(ops[0])-1; + b = 0; + + while(b <= t) + { + m = ((t-b)>>1) + b; + if((r = strcmp(ops[m].mnemonic, n)) == 0) + { + return &ops[m]; + } + else if(r < 0) + { + b = m + 1; + } + else + { + t = m - 1; + } + } +#endif + + return NULL; +} + +static reg_t parsereg(const char* str) +{ + const char* s = str; + if(*s == 'a' && s[1] == 'l' && !s[2]) + { + return R_AL; + } + else if(*s == 'a' && s[1] == 'x' && !s[2]) + { + return R_AX; + } + if(*s == 'c' && s[1] == 'l' && !s[2]) + { + return R_CL; + } + if(*s == 'x') + { + if(!strcmp(s, "xmm0")) + return R_XMM0; + } + else if(*s == 'r' && s[1]) + { + ++s; + if(s[1] == 'x') + { + switch(*s++) + { + case 'a': return R_RAX; + case 'b': return R_RBX; + case 'c': return R_RCX; + case 'd': return R_RDX; + } + } + else if(s[1] == 'i') + { + switch(*s++) + { + case 's': return R_RSI; + case 'd': return R_RDI; + } + } + else if(s[0] == 's' && s[1] == 'p' && !s[2]) + { + return R_RSP; + } + else if(*s == '8' && !s[1]) + return R_R8; + else if(*s == '9' && !s[1]) + return R_R9; + else if(*s == '1' && s[1] == '0') + return R_R10; + else if(*s == '1' && s[1] == '5') + return R_R15; + } + else if(*s == 'e' && s[1]) + { + ++s; + if(s[1] == 'x') + { + switch(*s++) + { + case 'a': return R_EAX; + case 'b': return R_EBX; + case 'c': return R_ECX; + case 'd': return R_EDX; + } + } + else if(s[1] == 'i') + { + switch(*s++) + { + case 's': return R_ESI; + case 'd': return R_EDI; + } + } + } + + crap("invalid register %s", str); + + return 0; +} + +typedef enum { + TOK_LABEL = 0x80, + TOK_INT = 0x81, + TOK_END = 0x82, + TOK_INVALID = 0x83, +} token_t; + +static unsigned char nexttok(const char** str, char* label, u64* val) +{ + const char* s = *str; + + if(label) *label = 0; + if(val) *val = 0; + + while(*s && *s == ' ') ++s; + + if(!*s) + { + return TOK_END; + } + else if(*s == '$' || *s == '*' || *s == '%' || *s == '-' || *s == ')' || *s == '(' || *s == ',') + { + *str = s+1; + return *s; + } + else if(*s >= 'a' && *s <= 'z') + { + size_t a = strspn(s+1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"); + if(a+1 >= LABELLEN) + crap("label %s too long", s); + if(label) + { + strncpy(label, s, a+1); + label[a+1] = 0; + } + *str = s+a+1; + return TOK_LABEL; + } + else if(*s >= '0' && *s <= '9') + { + char* endptr = NULL; + u64 v = strtol(s, &endptr, 0); + if(endptr && (endptr-s == 0)) + crap("invalid integer %s", s); + if(val) *val = v; + *str = endptr; + return TOK_INT; + } + crap("can't parse '%s'", *str); + return TOK_INVALID; +} + +static arg_t parsearg(const char** str) +{ + arg_t arg; + const char* s = *str; + char label[20]; + u64 val; + int negative = 1; + unsigned ttype; + + arg.type = T_NONE; + arg.absolute = 0; + + while(*s && *s == ' ') ++s; + + switch(nexttok(&s, label, &val)) + { + case '$' : + ttype = nexttok(&s, NULL, &val); + if(ttype == '-') + { + negative = -1; + ttype = nexttok(&s, NULL, &val); + } + if(ttype != TOK_INT) + crap("expected integer"); + arg.type = T_IMMEDIATE; + arg.v.imm = negative * val; + break; + case '*' : + if((ttype = nexttok(&s, NULL, NULL)) != '%') + { + if(ttype == '(') + goto tok_memory; + crap("expected '%%'"); + } + arg.absolute = 1; + /* fall through */ + case '%' : + if(nexttok(&s, label, &val) != TOK_LABEL) + crap("expected label"); + arg.type = T_REGISTER; + arg.v.reg = parsereg(label); + break; + case TOK_LABEL: + arg.type = T_LABEL; + strncpy(arg.v.label, label, LABELLEN); + break; + case '-': + negative = -1; + if(nexttok(&s, NULL, &val) != TOK_INT) + crap("expected integer"); + /* fall through */ + case TOK_INT: + if(nexttok(&s, label, NULL) != '(') + crap("expected '('"); // mov to/from fixed address not supported + /* fall through */ + case '(': +tok_memory: + arg.type = T_MEMORY; + arg.v.mem.indextype = T_NONE; + arg.v.mem.disp = negative * val; + ttype = nexttok(&s, label, &val); + if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL) + { + crap("expected register"); + } + if (ttype == '%') + { + arg.v.mem.basetype = T_REGISTER; + arg.v.mem.base.reg = parsereg(label); + } + else if (ttype == TOK_INT) + { + arg.v.mem.basetype = T_IMMEDIATE; + arg.v.mem.base.imm = val; + } + if((ttype = nexttok(&s, NULL, NULL)) == ',') + { + ttype = nexttok(&s, label, &val); + if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL) + { + crap("expected register"); + } + if (ttype == '%') + { + arg.v.mem.indextype = T_REGISTER; + arg.v.mem.index.reg = parsereg(label); + } + else if (ttype == TOK_INT) + { + crap("index must be register"); + arg.v.mem.indextype = T_IMMEDIATE; + arg.v.mem.index.imm = val; + } + if(nexttok(&s, NULL, NULL) != ',') + crap("expected ','"); + if(nexttok(&s, NULL, &val) != TOK_INT) + crap("expected integer"); + if(val != 1 && val != 2 && val != 4 && val != 8) + crap("scale must 1, 2, 4 or 8"); + arg.v.mem.scale = val; + + ttype = nexttok(&s, NULL, NULL); + } + if(ttype != ')') + { + crap("expected ')' or ','"); + } + break; + default: + crap("invalid token %hhu in %s", *(unsigned char*)s, *str); + break; + } + + *str = s; + + return arg; +} + +/* ************************* */ + +void assembler_init(int pass) +{ + compiledOfs = 0; + assembler_pass = pass; + if(!pass) + { + labelhash_free(); + cur_line = NULL; + } + if(!ops_sorted) + { + ops_sorted = 1; + qsort(ops, sizeof(ops)/sizeof(ops[0])-1, sizeof(ops[0]), opsort); + } +} + +size_t assembler_get_code_size(void) +{ + return compiledOfs; +} + +void assembler_set_output(char* buf) +{ + out = buf; +} + +void assemble_line(const char* input, size_t len) +{ + char line[4096]; + char* s; + op_t* o; + char* opn; + arg_t arg1, arg2; + + arg1.type = T_NONE; + arg2.type = T_NONE; + opn = NULL; + o = NULL; + + if(len < 1) + return; + + if(len >= sizeof(line)) + crap("line too long"); + + memcpy(line, input, sizeof(line)); + cur_line = input; + + if(line[len-1] == '\n') line[--len] = 0; + if(line[len-1] == ':') + { + line[--len] = 0; + if(assembler_pass) + debug("%s: 0x%x\n", line, compiledOfs); + else + hash_add_label(line, compiledOfs); + } + else + { + opn = line; + s = strchr(line, ' '); + if(s) + { + *s++ = 0; + arg1 = parsearg((const char**)&s); + if(*s) + { + if(*s != ',') + crap("expected ',', got '%c'", *s); + ++s; + arg2 = parsearg((const char**)&s); + } + } + + if(!opn) + { + crap("no operator in %s", line); + } + + o = getop(opn); + if(!o) + { + crap("cannot handle op %s", opn); + } + o->func(opn, arg1, arg2, o->data); + if(assembler_pass) + debug(" - %s%s", cur_line, cur_line[strlen(cur_line)-1]=='\n'?"":"\n"); + } +} + +#ifdef SA_STANDALONE +int main(int argc, char* argv[]) +{ + char line[4096]; + size_t len; + int pass; + FILE* file = NULL; + + if(argc < 2) + { + crap("specify file"); + } + + file = fopen(argv[1], "r"); + if(!file) + { + crap("can't open file"); + } + + if(argc > 2) + { + fout = fopen(argv[2], "w"); + if(!fout) + { + crap("can't open %s for writing", argv[2]); + } + } + + for(pass = 0; pass < 2; ++pass) + { + if(fseek(file, 0, SEEK_SET)) + crap("can't rewind file"); + + if(pass) + { + char* b = malloc(assembler_get_code_size()); + if(!b) + crap("cannot allocate memory"); + assembler_set_output(b); + } + + assembler_init(pass); + + while(fgets(line, sizeof(line), file)) + { + len = strlen(line); + if(!len) continue; + + assemble_line(line, len); + } + } + + assembler_init(0); + + fclose(file); + + return 0; +} +#endif |