summaryrefslogtreecommitdiff
path: root/src/qcommon
diff options
context:
space:
mode:
authorTim Angus <tim@ngus.net>2007-08-24 00:32:53 +0000
committerTim Angus <tim@ngus.net>2007-08-24 00:32:53 +0000
commitd6cbf3366e63e4e4333e49377ee994d26b266a6c (patch)
tree18c6488dbd654cb747534fc7a1dcd240be492bdb /src/qcommon
parent9bb5d25cc24fd678660af34b9b19e21a37f09bd3 (diff)
* Merged ioq3-r1133
+ PNG image loader + Non-gas dependent x86_64 VM + Collision optimisations + Slew of other bug fixes
Diffstat (limited to 'src/qcommon')
-rw-r--r--src/qcommon/cm_patch.c7
-rw-r--r--src/qcommon/cm_test.c4
-rw-r--r--src/qcommon/cm_trace.c5
-rw-r--r--src/qcommon/common.c2
-rw-r--r--src/qcommon/files.c28
-rw-r--r--src/qcommon/msg.c4
-rw-r--r--src/qcommon/puff.c758
-rw-r--r--src/qcommon/puff.h43
-rw-r--r--src/qcommon/q_math.c47
-rw-r--r--src/qcommon/q_shared.h9
-rw-r--r--src/qcommon/vm_x86.c2
-rw-r--r--src/qcommon/vm_x86_64.c152
-rw-r--r--src/qcommon/vm_x86_64_assembler.c1419
13 files changed, 2431 insertions, 49 deletions
diff --git a/src/qcommon/cm_patch.c b/src/qcommon/cm_patch.c
index f262db9c..38b7d5cc 100644
--- a/src/qcommon/cm_patch.c
+++ b/src/qcommon/cm_patch.c
@@ -1154,7 +1154,7 @@ struct patchCollide_s *CM_GeneratePatchCollide( int width, int height, vec3_t *p
if ( width <= 2 || height <= 2 || !points ) {
Com_Error( ERR_DROP, "CM_GeneratePatchFacets: bad parameters: (%i, %i, %p)",
- width, height, points );
+ width, height, (void *)points );
}
if ( !(width & 1) || !(height & 1) ) {
@@ -1387,6 +1387,11 @@ void CM_TraceThroughPatchCollide( traceWork_t *tw, const struct patchCollide_s *
static cvar_t *cv;
#endif //BSPC
+ if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1],
+ pc->bounds[0], pc->bounds[1] ) ) {
+ return;
+ }
+
if (tw->isPoint) {
CM_TracePointThroughPatchCollide( tw, pc );
return;
diff --git a/src/qcommon/cm_test.c b/src/qcommon/cm_test.c
index 9e950603..485facc2 100644
--- a/src/qcommon/cm_test.c
+++ b/src/qcommon/cm_test.c
@@ -251,6 +251,10 @@ int CM_PointContents( const vec3_t p, clipHandle_t model ) {
brushnum = cm.leafbrushes[leaf->firstLeafBrush+k];
b = &cm.brushes[brushnum];
+ if ( !BoundsIntersectPoint( b->bounds[0], b->bounds[1], p ) ) {
+ continue;
+ }
+
// see if the point is in the brush
for ( i = 0 ; i < b->numsides ; i++ ) {
d = DotProduct( p, b->sides[i].plane->normal );
diff --git a/src/qcommon/cm_trace.c b/src/qcommon/cm_trace.c
index ee9540e3..c40e1013 100644
--- a/src/qcommon/cm_trace.c
+++ b/src/qcommon/cm_trace.c
@@ -847,6 +847,11 @@ void CM_TraceThroughLeaf( traceWork_t *tw, cLeaf_t *leaf ) {
b->collided = qfalse;
+ if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1],
+ b->bounds[0], b->bounds[1] ) ) {
+ continue;
+ }
+
CM_TraceThroughBrush( tw, b );
if ( !tw->trace.fraction ) {
tw->trace.lateralFraction = 0.0f;
diff --git a/src/qcommon/common.c b/src/qcommon/common.c
index cdfe3386..be4eafd1 100644
--- a/src/qcommon/common.c
+++ b/src/qcommon/common.c
@@ -1257,7 +1257,7 @@ void Com_Meminfo_f( void ) {
for (block = mainzone->blocklist.next ; ; block = block->next) {
if ( Cmd_Argc() != 1 ) {
Com_Printf ("block:%p size:%7i tag:%3i\n",
- block, block->size, block->tag);
+ (void *)block, block->size, block->tag);
}
if ( block->tag ) {
zoneBytes += block->size;
diff --git a/src/qcommon/files.c b/src/qcommon/files.c
index cd369908..c1f6fb2d 100644
--- a/src/qcommon/files.c
+++ b/src/qcommon/files.c
@@ -975,22 +975,6 @@ qboolean FS_FilenameCompare( const char *s1, const char *s2 ) {
/*
===========
-FS_ShiftedStrStr
-===========
-*/
-char *FS_ShiftedStrStr(const char *string, const char *substring, int shift) {
- char buf[MAX_STRING_TOKENS];
- int i;
-
- for (i = 0; substring[i]; i++) {
- buf[i] = substring[i] + shift;
- }
- buf[i] = '\0';
- return strstr(string, buf);
-}
-
-/*
-===========
FS_FOpenFileRead
Finds the file in the search path.
@@ -1117,19 +1101,13 @@ int FS_FOpenFileRead( const char *filename, fileHandle_t *file, qboolean uniqueF
}
}
- // game.qvm - 13
- // ZT`X!di`
- if (!(pak->referenced & FS_QAGAME_REF) && FS_ShiftedStrStr(filename, "ZT`X!di`", 13)) {
+ if (!(pak->referenced & FS_QAGAME_REF) && strstr(filename, "game.qvm")) {
pak->referenced |= FS_QAGAME_REF;
}
- // cgame.qvm - 7
- // \`Zf^'jof
- if (!(pak->referenced & FS_CGAME_REF) && FS_ShiftedStrStr(filename , "\\`Zf^'jof", 7)) {
+ if (!(pak->referenced & FS_CGAME_REF) && strstr(filename, "cgame.qvm")) {
pak->referenced |= FS_CGAME_REF;
}
- // ui.qvm - 5
- // pd)lqh
- if (!(pak->referenced & FS_UI_REF) && FS_ShiftedStrStr(filename , "pd)lqh", 5)) {
+ if (!(pak->referenced & FS_UI_REF) && strstr(filename, "ui.qvm")) {
pak->referenced |= FS_UI_REF;
}
diff --git a/src/qcommon/msg.c b/src/qcommon/msg.c
index 46019809..d46c2a3b 100644
--- a/src/qcommon/msg.c
+++ b/src/qcommon/msg.c
@@ -791,7 +791,7 @@ typedef struct {
} netField_t;
// using the stringizing operator to save typing...
-#define NETF(x) #x,(int)&((entityState_t*)0)->x
+#define NETF(x) #x,(size_t)&((entityState_t*)0)->x
netField_t entityStateFields[] =
{
@@ -1106,7 +1106,7 @@ plyer_state_t communication
*/
// using the stringizing operator to save typing...
-#define PSF(x) #x,(int)&((playerState_t*)0)->x
+#define PSF(x) #x,(size_t)&((playerState_t*)0)->x
netField_t playerStateFields[] =
{
diff --git a/src/qcommon/puff.c b/src/qcommon/puff.c
new file mode 100644
index 00000000..721854d8
--- /dev/null
+++ b/src/qcommon/puff.c
@@ -0,0 +1,758 @@
+/*
+ * This is a modified version of Mark Adlers work,
+ * see below for the original copyright.
+ * 2006 - Joerg Dietrich <dietrich_joerg@gmx.de>
+ */
+
+/*
+ * puff.c
+ * Copyright (C) 2002-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in puff.h
+ * version 1.8, 9 Jan 2004
+ *
+ * puff.c is a simple inflate written to be an unambiguous way to specify the
+ * deflate format. It is not written for speed but rather simplicity. As a
+ * side benefit, this code might actually be useful when small code is more
+ * important than speed, such as bootstrap applications. For typical deflate
+ * data, zlib's inflate() is about four times as fast as puff(). zlib's
+ * inflate compiles to around 20K on my machine, whereas puff.c compiles to
+ * around 4K on my machine (a PowerPC using GNU cc). If the faster decode()
+ * function here is used, then puff() is only twice as slow as zlib's
+ * inflate().
+ *
+ * All dynamically allocated memory comes from the stack. The stack required
+ * is less than 2K bytes. This code is compatible with 16-bit int's and
+ * assumes that long's are at least 32 bits. puff.c uses the short data type,
+ * assumed to be 16 bits, for arrays in order to to conserve memory. The code
+ * works whether integers are stored big endian or little endian.
+ *
+ * In the comments below are "Format notes" that describe the inflate process
+ * and document some of the less obvious aspects of the format. This source
+ * code is meant to supplement RFC 1951, which formally describes the deflate
+ * format:
+ *
+ * http://www.zlib.org/rfc-deflate.html
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0 10 Feb 2002 - First version
+ * 1.1 17 Feb 2002 - Clarifications of some comments and notes
+ * - Update puff() dest and source pointers on negative
+ * errors to facilitate debugging deflators
+ * - Remove longest from struct huffman -- not needed
+ * - Simplify offs[] index in construct()
+ * - Add input size and checking, using longjmp() to
+ * maintain easy readability
+ * - Use short data type for large arrays
+ * - Use pointers instead of long to specify source and
+ * destination sizes to avoid arbitrary 4 GB limits
+ * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!),
+ * but leave simple version for readabilty
+ * - Make sure invalid distances detected if pointers
+ * are 16 bits
+ * - Fix fixed codes table error
+ * - Provide a scanning mode for determining size of
+ * uncompressed data
+ * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Jean-loup]
+ * - Add a puff.h file for the interface
+ * - Add braces in puff() for else do [Jean-loup]
+ * - Use indexes instead of pointers for readability
+ * 1.4 31 Mar 2002 - Simplify construct() code set check
+ * - Fix some comments
+ * - Add FIXLCODES #define
+ * 1.5 6 Apr 2002 - Minor comment fixes
+ * 1.6 7 Aug 2002 - Minor format changes
+ * 1.7 3 Mar 2003 - Added test code for distribution
+ * - Added zlib-like license
+ * 1.8 9 Jan 2004 - Added some comments on no distance codes case
+ */
+
+#include <setjmp.h> /* for setjmp(), longjmp(), and jmp_buf */
+#include "puff.h" /* prototype for puff() */
+
+#define local static /* for local function definitions */
+
+/*
+ * Maximums for allocations and loops. It is not useful to change these --
+ * they are fixed by the deflate format.
+ */
+#define MAXBITS 15 /* maximum bits in a code */
+#define MAXLCODES 286 /* maximum number of literal/length codes */
+#define MAXDCODES 30 /* maximum number of distance codes */
+#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */
+#define FIXLCODES 288 /* number of fixed literal/length codes */
+
+/* input and output state */
+struct state {
+ /* output state */
+ uint8_t *out; /* output buffer */
+ uint32_t outlen; /* available space at out */
+ uint32_t outcnt; /* bytes written to out so far */
+
+ /* input state */
+ uint8_t *in; /* input buffer */
+ uint32_t inlen; /* available input at in */
+ uint32_t incnt; /* bytes read so far */
+ int32_t bitbuf; /* bit buffer */
+ int32_t bitcnt; /* number of bits in bit buffer */
+
+ /* input limit error return state for bits() and decode() */
+ jmp_buf env;
+};
+
+/*
+ * Return need bits from the input stream. This always leaves less than
+ * eight bits in the buffer. bits() works properly for need == 0.
+ *
+ * Format notes:
+ *
+ * - Bits are stored in bytes from the least significant bit to the most
+ * significant bit. Therefore bits are dropped from the bottom of the bit
+ * buffer, using shift right, and new bytes are appended to the top of the
+ * bit buffer, using shift left.
+ */
+local int32_t bits(struct state *s, int32_t need)
+{
+ int32_t val; /* bit accumulator (can use up to 20 bits) */
+
+ /* load at least need bits into val */
+ val = s->bitbuf;
+ while (s->bitcnt < need) {
+ if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */
+ val |= (int32_t)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */
+ s->bitcnt += 8;
+ }
+
+ /* drop need bits and update buffer, always zero to seven bits left */
+ s->bitbuf = (int32_t)(val >> need);
+ s->bitcnt -= need;
+
+ /* return need bits, zeroing the bits above that */
+ return (int32_t)(val & ((1L << need) - 1));
+}
+
+/*
+ * Process a stored block.
+ *
+ * Format notes:
+ *
+ * - After the two-bit stored block type (00), the stored block length and
+ * stored bytes are byte-aligned for fast copying. Therefore any leftover
+ * bits in the byte that has the last bit of the type, as many as seven, are
+ * discarded. The value of the discarded bits are not defined and should not
+ * be checked against any expectation.
+ *
+ * - The second inverted copy of the stored block length does not have to be
+ * checked, but it's probably a good idea to do so anyway.
+ *
+ * - A stored block can have zero length. This is sometimes used to byte-align
+ * subsets of the compressed data for random access or partial recovery.
+ */
+local int32_t stored(struct state *s)
+{
+ uint32_t len; /* length of stored block */
+
+ /* discard leftover bits from current byte (assumes s->bitcnt < 8) */
+ s->bitbuf = 0;
+ s->bitcnt = 0;
+
+ /* get length and check against its one's complement */
+ if (s->incnt + 4 > s->inlen) return 2; /* not enough input */
+ len = s->in[s->incnt++];
+ len |= s->in[s->incnt++] << 8;
+ if (s->in[s->incnt++] != (~len & 0xff) ||
+ s->in[s->incnt++] != ((~len >> 8) & 0xff))
+ return -2; /* didn't match complement! */
+
+ /* copy len bytes from in to out */
+ if (s->incnt + len > s->inlen) return 2; /* not enough input */
+ if (s->out != NULL) {
+ if (s->outcnt + len > s->outlen)
+ return 1; /* not enough output space */
+ while (len--)
+ s->out[s->outcnt++] = s->in[s->incnt++];
+ }
+ else { /* just scanning */
+ s->outcnt += len;
+ s->incnt += len;
+ }
+
+ /* done with a valid stored block */
+ return 0;
+}
+
+/*
+ * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of
+ * each length, which for a canonical code are stepped through in order.
+ * symbol[] are the symbol values in canonical order, where the number of
+ * entries is the sum of the counts in count[]. The decoding process can be
+ * seen in the function decode() below.
+ */
+struct huffman {
+ int16_t *count; /* number of symbols of each length */
+ int16_t *symbol; /* canonically ordered symbols */
+};
+
+/*
+ * Decode a code from the stream s using huffman table h. Return the symbol or
+ * a negative value if there is an error. If all of the lengths are zero, i.e.
+ * an empty code, or if the code is incomplete and an invalid code is received,
+ * then -9 is returned after reading MAXBITS bits.
+ *
+ * Format notes:
+ *
+ * - The codes as stored in the compressed data are bit-reversed relative to
+ * a simple integer ordering of codes of the same lengths. Hence below the
+ * bits are pulled from the compressed data one at a time and used to
+ * build the code value reversed from what is in the stream in order to
+ * permit simple integer comparisons for decoding. A table-based decoding
+ * scheme (as used in zlib) does not need to do this reversal.
+ *
+ * - The first code for the shortest length is all zeros. Subsequent codes of
+ * the same length are simply integer increments of the previous code. When
+ * moving up a length, a zero bit is appended to the code. For a complete
+ * code, the last code of the longest length will be all ones.
+ *
+ * - Incomplete codes are handled by this decoder, since they are permitted
+ * in the deflate format. See the format notes for fixed() and dynamic().
+ */
+local int32_t decode(struct state *s, struct huffman *h)
+{
+ int32_t len; /* current number of bits in code */
+ int32_t code; /* len bits being decoded */
+ int32_t first; /* first code of length len */
+ int32_t count; /* number of codes of length len */
+ int32_t index; /* index of first code of length len in symbol table */
+ int32_t bitbuf; /* bits from stream */
+ int32_t left; /* bits left in next or left to process */
+ int16_t *next; /* next number of codes */
+
+ bitbuf = s->bitbuf;
+ left = s->bitcnt;
+ code = first = index = 0;
+ len = 1;
+ next = h->count + 1;
+ while (1) {
+ while (left--) {
+ code |= bitbuf & 1;
+ bitbuf >>= 1;
+ count = *next++;
+ if (code < first + count) { /* if length len, return symbol */
+ s->bitbuf = bitbuf;
+ s->bitcnt = (s->bitcnt - len) & 7;
+ return h->symbol[index + (code - first)];
+ }
+ index += count; /* else update for next length */
+ first += count;
+ first <<= 1;
+ code <<= 1;
+ len++;
+ }
+ left = (MAXBITS+1) - len;
+ if (left == 0) break;
+ if (s->incnt == s->inlen) longjmp(s->env, 1); /* out of input */
+ bitbuf = s->in[s->incnt++];
+ if (left > 8) left = 8;
+ }
+ return -9; /* ran out of codes */
+}
+
+/*
+ * Given the list of code lengths length[0..n-1] representing a canonical
+ * Huffman code for n symbols, construct the tables required to decode those
+ * codes. Those tables are the number of codes of each length, and the symbols
+ * sorted by length, retaining their original order within each length. The
+ * return value is zero for a complete code set, negative for an over-
+ * subscribed code set, and positive for an incomplete code set. The tables
+ * can be used if the return value is zero or positive, but they cannot be used
+ * if the return value is negative. If the return value is zero, it is not
+ * possible for decode() using that table to return an error--any stream of
+ * enough bits will resolve to a symbol. If the return value is positive, then
+ * it is possible for decode() using that table to return an error for received
+ * codes past the end of the incomplete lengths.
+ *
+ * Not used by decode(), but used for error checking, h->count[0] is the number
+ * of the n symbols not in the code. So n - h->count[0] is the number of
+ * codes. This is useful for checking for incomplete codes that have more than
+ * one symbol, which is an error in a dynamic block.
+ *
+ * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
+ * This is assured by the construction of the length arrays in dynamic() and
+ * fixed() and is not verified by construct().
+ *
+ * Format notes:
+ *
+ * - Permitted and expected examples of incomplete codes are one of the fixed
+ * codes and any code with a single symbol which in deflate is coded as one
+ * bit instead of zero bits. See the format notes for fixed() and dynamic().
+ *
+ * - Within a given code length, the symbols are kept in ascending order for
+ * the code bits definition.
+ */
+local int32_t construct(struct huffman *h, int16_t *length, int32_t n)
+{
+ int32_t symbol; /* current symbol when stepping through length[] */
+ int32_t len; /* current length when stepping through h->count[] */
+ int32_t left; /* number of possible codes left of current length */
+ int16_t offs[MAXBITS+1]; /* offsets in symbol table for each length */
+
+ /* count number of codes of each length */
+ for (len = 0; len <= MAXBITS; len++)
+ h->count[len] = 0;
+ for (symbol = 0; symbol < n; symbol++)
+ (h->count[length[symbol]])++; /* assumes lengths are within bounds */
+ if (h->count[0] == n) /* no codes! */
+ return 0; /* complete, but decode() will fail */
+
+ /* check for an over-subscribed or incomplete set of lengths */
+ left = 1; /* one possible code of zero length */
+ for (len = 1; len <= MAXBITS; len++) {
+ left <<= 1; /* one more bit, double codes left */
+ left -= h->count[len]; /* deduct count from possible codes */
+ if (left < 0) return left; /* over-subscribed--return negative */
+ } /* left > 0 means incomplete */
+
+ /* generate offsets into symbol table for each length for sorting */
+ offs[1] = 0;
+ for (len = 1; len < MAXBITS; len++)
+ offs[len + 1] = offs[len] + h->count[len];
+
+ /*
+ * put symbols in table sorted by length, by symbol order within each
+ * length
+ */
+ for (symbol = 0; symbol < n; symbol++)
+ if (length[symbol] != 0)
+ h->symbol[offs[length[symbol]]++] = symbol;
+
+ /* return zero for complete set, positive for incomplete set */
+ return left;
+}
+
+/*
+ * Decode literal/length and distance codes until an end-of-block code.
+ *
+ * Format notes:
+ *
+ * - Compressed data that is after the block type if fixed or after the code
+ * description if dynamic is a combination of literals and length/distance
+ * pairs terminated by and end-of-block code. Literals are simply Huffman
+ * coded bytes. A length/distance pair is a coded length followed by a
+ * coded distance to represent a string that occurs earlier in the
+ * uncompressed data that occurs again at the current location.
+ *
+ * - Literals, lengths, and the end-of-block code are combined into a single
+ * code of up to 286 symbols. They are 256 literals (0..255), 29 length
+ * symbols (257..285), and the end-of-block symbol (256).
+ *
+ * - There are 256 possible lengths (3..258), and so 29 symbols are not enough
+ * to represent all of those. Lengths 3..10 and 258 are in fact represented
+ * by just a length symbol. Lengths 11..257 are represented as a symbol and
+ * some number of extra bits that are added as an integer to the base length
+ * of the length symbol. The number of extra bits is determined by the base
+ * length symbol. These are in the static arrays below, lens[] for the base
+ * lengths and lext[] for the corresponding number of extra bits.
+ *
+ * - The reason that 258 gets its own symbol is that the longest length is used
+ * often in highly redundant files. Note that 258 can also be coded as the
+ * base value 227 plus the maximum extra value of 31. While a good deflate
+ * should never do this, it is not an error, and should be decoded properly.
+ *
+ * - If a length is decoded, including its extra bits if any, then it is
+ * followed a distance code. There are up to 30 distance symbols. Again
+ * there are many more possible distances (1..32768), so extra bits are added
+ * to a base value represented by the symbol. The distances 1..4 get their
+ * own symbol, but the rest require extra bits. The base distances and
+ * corresponding number of extra bits are below in the static arrays dist[]
+ * and dext[].
+ *
+ * - Literal bytes are simply written to the output. A length/distance pair is
+ * an instruction to copy previously uncompressed bytes to the output. The
+ * copy is from distance bytes back in the output stream, copying for length
+ * bytes.
+ *
+ * - Distances pointing before the beginning of the output data are not
+ * permitted.
+ *
+ * - Overlapped copies, where the length is greater than the distance, are
+ * allowed and common. For example, a distance of one and a length of 258
+ * simply copies the last byte 258 times. A distance of four and a length of
+ * twelve copies the last four bytes three times. A simple forward copy
+ * ignoring whether the length is greater than the distance or not implements
+ * this correctly. You should not use memcpy() since its behavior is not
+ * defined for overlapped arrays. You should not use memmove() or bcopy()
+ * since though their behavior -is- defined for overlapping arrays, it is
+ * defined to do the wrong thing in this case.
+ */
+local int32_t codes(struct state *s,
+ struct huffman *lencode,
+ struct huffman *distcode)
+{
+ int32_t symbol; /* decoded symbol */
+ int32_t len; /* length for copy */
+ uint32_t dist; /* distance for copy */
+ static const int16_t lens[29] = { /* Size base for length codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
+ static const int16_t lext[29] = { /* Extra bits for length codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
+ static const int16_t dists[30] = { /* Offset base for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+ static const int16_t dext[30] = { /* Extra bits for distance codes 0..29 */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+ /* decode literals and length/distance pairs */
+ do {
+ symbol = decode(s, lencode);
+ if (symbol < 0) return symbol; /* invalid symbol */
+ if (symbol < 256) { /* literal: symbol is the byte */
+ /* write out the literal */
+ if (s->out != NULL) {
+ if (s->outcnt == s->outlen) return 1;
+ s->out[s->outcnt] = symbol;
+ }
+ s->outcnt++;
+ }
+ else if (symbol > 256) { /* length */
+ /* get and compute length */
+ symbol -= 257;
+ if (symbol >= 29) return -9; /* invalid fixed code */
+ len = lens[symbol] + bits(s, lext[symbol]);
+
+ /* get and check distance */
+ symbol = decode(s, distcode);
+ if (symbol < 0) return symbol; /* invalid symbol */
+ dist = dists[symbol] + bits(s, dext[symbol]);
+ if (dist > s->outcnt)
+ return -10; /* distance too far back */
+
+ /* copy length bytes from distance bytes back */
+ if (s->out != NULL) {
+ if (s->outcnt + len > s->outlen) return 1;
+ while (len--) {
+ s->out[s->outcnt] = s->out[s->outcnt - dist];
+ s->outcnt++;
+ }
+ }
+ else
+ s->outcnt += len;
+ }
+ } while (symbol != 256); /* end of block symbol */
+
+ /* done with a valid fixed or dynamic block */
+ return 0;
+}
+
+/*
+ * Process a fixed codes block.
+ *
+ * Format notes:
+ *
+ * - This block type can be useful for compressing small amounts of data for
+ * which the size of the code descriptions in a dynamic block exceeds the
+ * benefit of custom codes for that block. For fixed codes, no bits are
+ * spent on code descriptions. Instead the code lengths for literal/length
+ * codes and distance codes are fixed. The specific lengths for each symbol
+ * can be seen in the "for" loops below.
+ *
+ * - The literal/length code is complete, but has two symbols that are invalid
+ * and should result in an error if received. This cannot be implemented
+ * simply as an incomplete code since those two symbols are in the "middle"
+ * of the code. They are eight bits long and the longest literal/length\
+ * code is nine bits. Therefore the code must be constructed with those
+ * symbols, and the invalid symbols must be detected after decoding.
+ *
+ * - The fixed distance codes also have two invalid symbols that should result
+ * in an error if received. Since all of the distance codes are the same
+ * length, this can be implemented as an incomplete code. Then the invalid
+ * codes are detected while decoding.
+ */
+local int32_t fixed(struct state *s)
+{
+ static int32_t virgin = 1;
+ static int16_t lencnt[MAXBITS+1], lensym[FIXLCODES];
+ static int16_t distcnt[MAXBITS+1], distsym[MAXDCODES];
+ static struct huffman lencode = {lencnt, lensym};
+ static struct huffman distcode = {distcnt, distsym};
+
+ /* build fixed huffman tables if first call (may not be thread safe) */
+ if (virgin) {
+ int32_t symbol;
+ int16_t lengths[FIXLCODES];
+
+ /* literal/length table */
+ for (symbol = 0; symbol < 144; symbol++)
+ lengths[symbol] = 8;
+ for (; symbol < 256; symbol++)
+ lengths[symbol] = 9;
+ for (; symbol < 280; symbol++)
+ lengths[symbol] = 7;
+ for (; symbol < FIXLCODES; symbol++)
+ lengths[symbol] = 8;
+ construct(&lencode, lengths, FIXLCODES);
+
+ /* distance table */
+ for (symbol = 0; symbol < MAXDCODES; symbol++)
+ lengths[symbol] = 5;
+ construct(&distcode, lengths, MAXDCODES);
+
+ /* do this just once */
+ virgin = 0;
+ }
+
+ /* decode data until end-of-block code */
+ return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Process a dynamic codes block.
+ *
+ * Format notes:
+ *
+ * - A dynamic block starts with a description of the literal/length and
+ * distance codes for that block. New dynamic blocks allow the compressor to
+ * rapidly adapt to changing data with new codes optimized for that data.
+ *
+ * - The codes used by the deflate format are "canonical", which means that
+ * the actual bits of the codes are generated in an unambiguous way simply
+ * from the number of bits in each code. Therefore the code descriptions
+ * are simply a list of code lengths for each symbol.
+ *
+ * - The code lengths are stored in order for the symbols, so lengths are
+ * provided for each of the literal/length symbols, and for each of the
+ * distance symbols.
+ *
+ * - If a symbol is not used in the block, this is represented by a zero as
+ * as the code length. This does not mean a zero-length code, but rather
+ * that no code should be created for this symbol. There is no way in the
+ * deflate format to represent a zero-length code.
+ *
+ * - The maximum number of bits in a code is 15, so the possible lengths for
+ * any code are 1..15.
+ *
+ * - The fact that a length of zero is not permitted for a code has an
+ * interesting consequence. Normally if only one symbol is used for a given
+ * code, then in fact that code could be represented with zero bits. However
+ * in deflate, that code has to be at least one bit. So for example, if
+ * only a single distance base symbol appears in a block, then it will be
+ * represented by a single code of length one, in particular one 0 bit. This
+ * is an incomplete code, since if a 1 bit is received, it has no meaning,
+ * and should result in an error. So incomplete distance codes of one symbol
+ * should be permitted, and the receipt of invalid codes should be handled.
+ *
+ * - It is also possible to have a single literal/length code, but that code
+ * must be the end-of-block code, since every dynamic block has one. This
+ * is not the most efficient way to create an empty block (an empty fixed
+ * block is fewer bits), but it is allowed by the format. So incomplete
+ * literal/length codes of one symbol should also be permitted.
+ *
+ * - If there are only literal codes and no lengths, then there are no distance
+ * codes. This is represented by one distance code with zero bits.
+ *
+ * - The list of up to 286 length/literal lengths and up to 30 distance lengths
+ * are themselves compressed using Huffman codes and run-length encoding. In
+ * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means
+ * that length, and the symbols 16, 17, and 18 are run-length instructions.
+ * Each of 16, 17, and 18 are follwed by extra bits to define the length of
+ * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10
+ * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols
+ * are common, hence the special coding for zero lengths.
+ *
+ * - The symbols for 0..18 are Huffman coded, and so that code must be
+ * described first. This is simply a sequence of up to 19 three-bit values
+ * representing no code (0) or the code length for that symbol (1..7).
+ *
+ * - A dynamic block starts with three fixed-size counts from which is computed
+ * the number of literal/length code lengths, the number of distance code
+ * lengths, and the number of code length code lengths (ok, you come up with
+ * a better name!) in the code descriptions. For the literal/length and
+ * distance codes, lengths after those provided are considered zero, i.e. no
+ * code. The code length code lengths are received in a permuted order (see
+ * the order[] array below) to make a short code length code length list more
+ * likely. As it turns out, very short and very long codes are less likely
+ * to be seen in a dynamic code description, hence what may appear initially
+ * to be a peculiar ordering.
+ *
+ * - Given the number of literal/length code lengths (nlen) and distance code
+ * lengths (ndist), then they are treated as one long list of nlen + ndist
+ * code lengths. Therefore run-length coding can and often does cross the
+ * boundary between the two sets of lengths.
+ *
+ * - So to summarize, the code description at the start of a dynamic block is
+ * three counts for the number of code lengths for the literal/length codes,
+ * the distance codes, and the code length codes. This is followed by the
+ * code length code lengths, three bits each. This is used to construct the
+ * code length code which is used to read the remainder of the lengths. Then
+ * the literal/length code lengths and distance lengths are read as a single
+ * set of lengths using the code length codes. Codes are constructed from
+ * the resulting two sets of lengths, and then finally you can start
+ * decoding actual compressed data in the block.
+ *
+ * - For reference, a "typical" size for the code description in a dynamic
+ * block is around 80 bytes.
+ */
+local int32_t dynamic(struct state *s)
+{
+ int32_t nlen, ndist, ncode; /* number of lengths in descriptor */
+ int32_t index; /* index of lengths[] */
+ int32_t err; /* construct() return value */
+ int16_t lengths[MAXCODES]; /* descriptor code lengths */
+ int16_t lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */
+ int16_t distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */
+ struct huffman lencode = {lencnt, lensym}; /* length code */
+ struct huffman distcode = {distcnt, distsym}; /* distance code */
+ static const int16_t order[19] = /* permutation of code length codes */
+ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+ /* get number of lengths in each table, check lengths */
+ nlen = bits(s, 5) + 257;
+ ndist = bits(s, 5) + 1;
+ ncode = bits(s, 4) + 4;
+ if (nlen > MAXLCODES || ndist > MAXDCODES)
+ return -3; /* bad counts */
+
+ /* read code length code lengths (really), missing lengths are zero */
+ for (index = 0; index < ncode; index++)
+ lengths[order[index]] = bits(s, 3);
+ for (; index < 19; index++)
+ lengths[order[index]] = 0;
+
+ /* build huffman table for code lengths codes (use lencode temporarily) */
+ err = construct(&lencode, lengths, 19);
+ if (err != 0) return -4; /* require complete code set here */
+
+ /* read length/literal and distance code length tables */
+ index = 0;
+ while (index < nlen + ndist) {
+ int32_t symbol; /* decoded value */
+ int32_t len; /* last length to repeat */
+
+ symbol = decode(s, &lencode);
+ if (symbol < 16) /* length in 0..15 */
+ lengths[index++] = symbol;
+ else { /* repeat instruction */
+ len = 0; /* assume repeating zeros */
+ if (symbol == 16) { /* repeat last length 3..6 times */
+ if (index == 0) return -5; /* no last length! */
+ len = lengths[index - 1]; /* last length */
+ symbol = 3 + bits(s, 2);
+ }
+ else if (symbol == 17) /* repeat zero 3..10 times */
+ symbol = 3 + bits(s, 3);
+ else /* == 18, repeat zero 11..138 times */
+ symbol = 11 + bits(s, 7);
+ if (index + symbol > nlen + ndist)
+ return -6; /* too many lengths! */
+ while (symbol--) /* repeat last or zero symbol times */
+ lengths[index++] = len;
+ }
+ }
+
+ /* build huffman table for literal/length codes */
+ err = construct(&lencode, lengths, nlen);
+ if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1))
+ return -7; /* only allow incomplete codes if just one code */
+
+ /* build huffman table for distance codes */
+ err = construct(&distcode, lengths + nlen, ndist);
+ if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1))
+ return -8; /* only allow incomplete codes if just one code */
+
+ /* decode data until end-of-block code */
+ return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Inflate source to dest. On return, destlen and sourcelen are updated to the
+ * size of the uncompressed data and the size of the deflate data respectively.
+ * On success, the return value of puff() is zero. If there is an error in the
+ * source data, i.e. it is not in the deflate format, then a negative value is
+ * returned. If there is not enough input available or there is not enough
+ * output space, then a positive error is returned. In that case, destlen and
+ * sourcelen are not updated to facilitate retrying from the beginning with the
+ * provision of more input data or more output space. In the case of invalid
+ * inflate data (a negative error), the dest and source pointers are updated to
+ * facilitate the debugging of deflators.
+ *
+ * puff() also has a mode to determine the size of the uncompressed output with
+ * no output written. For this dest must be (uint8_t *)0. In this case,
+ * the input value of *destlen is ignored, and on return *destlen is set to the
+ * size of the uncompressed output.
+ *
+ * The return codes are:
+ *
+ * 2: available inflate data did not terminate
+ * 1: output space exhausted before completing inflate
+ * 0: successful inflate
+ * -1: invalid block type (type == 3)
+ * -2: stored block length did not match one's complement
+ * -3: dynamic block code description: too many length or distance codes
+ * -4: dynamic block code description: code lengths codes incomplete
+ * -5: dynamic block code description: repeat lengths with no first length
+ * -6: dynamic block code description: repeat more than specified lengths
+ * -7: dynamic block code description: invalid literal/length code lengths
+ * -8: dynamic block code description: invalid distance code lengths
+ * -9: invalid literal/length or distance code in fixed or dynamic block
+ * -10: distance is too far back in fixed or dynamic block
+ *
+ * Format notes:
+ *
+ * - Three bits are read for each block to determine the kind of block and
+ * whether or not it is the last block. Then the block is decoded and the
+ * process repeated if it was not the last block.
+ *
+ * - The leftover bits in the last byte of the deflate data after the last
+ * block (if it was a fixed or dynamic block) are undefined and have no
+ * expected values to check.
+ */
+int32_t puff(uint8_t *dest, /* pointer to destination pointer */
+ uint32_t *destlen, /* amount of output space */
+ uint8_t *source, /* pointer to source data pointer */
+ uint32_t *sourcelen) /* amount of input available */
+{
+ struct state s; /* input/output state */
+ int32_t last, type; /* block information */
+ int32_t err; /* return value */
+
+ /* initialize output state */
+ s.out = dest;
+ s.outlen = *destlen; /* ignored if dest is NULL */
+ s.outcnt = 0;
+
+ /* initialize input state */
+ s.in = source;
+ s.inlen = *sourcelen;
+ s.incnt = 0;
+ s.bitbuf = 0;
+ s.bitcnt = 0;
+
+ /* return if bits() or decode() tries to read past available input */
+ if (setjmp(s.env) != 0) /* if came back here via longjmp() */
+ err = 2; /* then skip do-loop, return error */
+ else {
+ /* process blocks until last block or error */
+ do {
+ last = bits(&s, 1); /* one if last block */
+ type = bits(&s, 2); /* block type 0..3 */
+ err = type == 0 ? stored(&s) :
+ (type == 1 ? fixed(&s) :
+ (type == 2 ? dynamic(&s) :
+ -1)); /* type == 3, invalid */
+ if (err != 0) break; /* return with error */
+ } while (!last);
+ }
+
+ /* update the lengths and return */
+ if (err <= 0) {
+ *destlen = s.outcnt;
+ *sourcelen = s.incnt;
+ }
+ return err;
+}
diff --git a/src/qcommon/puff.h b/src/qcommon/puff.h
new file mode 100644
index 00000000..14070f64
--- /dev/null
+++ b/src/qcommon/puff.h
@@ -0,0 +1,43 @@
+/*
+ * This is a modified version of Mark Adlers work,
+ * see below for the original copyright.
+ * 2006 - Joerg Dietrich <dietrich_joerg@gmx.de>
+ */
+
+/* puff.h
+ Copyright (C) 2002, 2003 Mark Adler, all rights reserved
+ version 1.7, 3 Mar 2002
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the author be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Mark Adler madler@alumni.caltech.edu
+ */
+
+#ifndef __PUFF_H
+#define __PUFF_H
+
+#include "q_shared.h" /* for definitions of the <stdint.h> types */
+
+/*
+ * See puff.c for purpose and usage.
+ */
+int32_t puff(uint8_t *dest, /* pointer to destination pointer */
+ uint32_t *destlen, /* amount of output space */
+ uint8_t *source, /* pointer to source data pointer */
+ uint32_t *sourcelen); /* amount of input available */
+
+#endif // __PUFF_H
diff --git a/src/qcommon/q_math.c b/src/qcommon/q_math.c
index 196d2f55..0973f31c 100644
--- a/src/qcommon/q_math.c
+++ b/src/qcommon/q_math.c
@@ -1086,6 +1086,53 @@ void AddPointToBounds( const vec3_t v, vec3_t mins, vec3_t maxs ) {
}
}
+qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs,
+ const vec3_t mins2, const vec3_t maxs2)
+{
+ if ( maxs[0] < mins2[0] ||
+ maxs[1] < mins2[1] ||
+ maxs[2] < mins2[2] ||
+ mins[0] > maxs2[0] ||
+ mins[1] > maxs2[1] ||
+ mins[2] > maxs2[2])
+ {
+ return qfalse;
+ }
+
+ return qtrue;
+}
+
+qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs,
+ const vec3_t origin, vec_t radius)
+{
+ if ( origin[0] - radius > maxs[0] ||
+ origin[0] + radius < mins[0] ||
+ origin[1] - radius > maxs[1] ||
+ origin[1] + radius < mins[1] ||
+ origin[2] - radius > maxs[2] ||
+ origin[2] + radius < mins[2])
+ {
+ return qfalse;
+ }
+
+ return qtrue;
+}
+
+qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs,
+ const vec3_t origin)
+{
+ if ( origin[0] > maxs[0] ||
+ origin[0] < mins[0] ||
+ origin[1] > maxs[1] ||
+ origin[1] < mins[1] ||
+ origin[2] > maxs[2] ||
+ origin[2] < mins[2])
+ {
+ return qfalse;
+ }
+
+ return qtrue;
+}
vec_t VectorNormalize( vec3_t v ) {
// NOTE: TTimo - Apple G4 altivec source uses double?
diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h
index 50a0b10c..2e3a153c 100644
--- a/src/qcommon/q_shared.h
+++ b/src/qcommon/q_shared.h
@@ -559,7 +559,7 @@ vec_t VectorLengthSquared( const vec3_t v );
vec_t Distance( const vec3_t p1, const vec3_t p2 );
vec_t DistanceSquared( const vec3_t p1, const vec3_t p2 );
-
+
void VectorNormalizeFast( vec3_t v );
void VectorInverse( vec3_t v );
@@ -593,6 +593,13 @@ void AxisCopy( vec3_t in[3], vec3_t out[3] );
void SetPlaneSignbits( struct cplane_s *out );
int BoxOnPlaneSide (vec3_t emins, vec3_t emaxs, struct cplane_s *plane);
+qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs,
+ const vec3_t mins2, const vec3_t maxs2);
+qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs,
+ const vec3_t origin, vec_t radius);
+qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs,
+ const vec3_t origin);
+
float AngleMod(float a);
float LerpAngle (float from, float to, float frac);
float AngleSubtract( float a1, float a2 );
diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c
index c0a703bc..d298b755 100644
--- a/src/qcommon/vm_x86.c
+++ b/src/qcommon/vm_x86.c
@@ -213,7 +213,7 @@ void callAsmCall(void)
// arbitrarily named (though this is not true for the MSC version). When a vm
// makes a system call, control jumps straight to the doAsmCall label.
void AsmCall( void ) {
- asm( CMANG(doAsmCall) ": \n\t" \
+ __asm__( CMANG(doAsmCall) ": \n\t" \
" movl (%%edi),%%eax \n\t" \
" subl $4,%%edi \n\t" \
" orl %%eax,%%eax \n\t" \
diff --git a/src/qcommon/vm_x86_64.c b/src/qcommon/vm_x86_64.c
index e8e827e5..814acfef 100644
--- a/src/qcommon/vm_x86_64.c
+++ b/src/qcommon/vm_x86_64.c
@@ -29,9 +29,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
+#include <stdarg.h>
+
+//#define USE_GAS
+//#define DEBUG_VM
#ifdef DEBUG_VM
#define Dfprintf(fd, args...) fprintf(fd, ##args)
@@ -40,6 +46,19 @@ static FILE* qdasmout;
#define Dfprintf(args...)
#endif
+#define VM_X86_64_MMAP
+
+#ifndef USE_GAS
+void assembler_set_output(char* buf);
+size_t assembler_get_code_size(void);
+void assembler_init(int pass);
+void assemble_line(const char* input, size_t len);
+#ifdef Dfprintf
+#undef Dfprintf
+#define Dfprintf(args...)
+#endif
+#endif // USE_GAS
+
static void VM_Destroy_Compiled(vm_t* self);
/*
@@ -207,8 +226,29 @@ static unsigned char op_argsize[256] =
[OP_BLOCK_COPY] = 4,
};
+#ifdef USE_GAS
#define emit(x...) \
do { fprintf(fh_s, ##x); fputc('\n', fh_s); } while(0)
+#else
+void emit(const char* fmt, ...)
+{
+ va_list ap;
+ char line[4096];
+ va_start(ap, fmt);
+ vsnprintf(line, sizeof(line), fmt, ap);
+ va_end(ap);
+ assemble_line(line, strlen(line));
+}
+#endif // USE_GAS
+
+#ifdef USE_GAS
+#define JMPIARG \
+ emit("jmp i_%08x", iarg);
+#else
+#define JMPIARG \
+ emit("movq $%lu, %%rax", vm->codeBase+vm->instructionPointers[iarg]); \
+ emit("jmpq *%rax");
+#endif
// integer compare and jump
#define IJ(op) \
@@ -216,7 +256,8 @@ static unsigned char op_argsize[256] =
emit("movl 4(%%rsi), %%eax"); \
emit("cmpl 8(%%rsi), %%eax"); \
emit(op " i_%08x", instruction+1); \
- emit("jmp i_%08x", iarg);
+ JMPIARG \
+ neednilabel = 1;
#ifdef USE_X87
#define FJ(bits, op) \
@@ -226,7 +267,8 @@ static unsigned char op_argsize[256] =
emit("fnstsw %%ax");\
emit("testb $" #bits ", %%ah");\
emit(op " i_%08x", instruction+1);\
- emit("jmp i_%08x", iarg);
+ JMPIARG \
+ neednilabel = 1;
#define XJ(x)
#else
#define FJ(x, y)
@@ -236,7 +278,8 @@ static unsigned char op_argsize[256] =
emit("ucomiss 8(%%rsi), %%xmm0");\
emit("jp i_%08x", instruction+1);\
emit(op " i_%08x", instruction+1);\
- emit("jmp i_%08x", iarg);
+ JMPIARG \
+ neednilabel = 1;
#endif
#define SIMPLE(op) \
@@ -293,9 +336,14 @@ static unsigned char op_argsize[256] =
static void* getentrypoint(vm_t* vm)
{
+#ifdef USE_GAS
return vm->codeBase+64; // skip ELF header
+#else
+ return vm->codeBase;
+#endif // USE_GAS
}
+#ifdef USE_GAS
char* mmapfile(const char* fn, size_t* size)
{
int fd = -1;
@@ -383,6 +431,7 @@ static int doas(char* in, char* out, unsigned char** compiledcode)
return size;
}
+#endif // USE_GAS
static void block_copy_vm(unsigned dest, unsigned src, unsigned count)
{
@@ -411,8 +460,13 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
char* code;
unsigned iarg = 0;
unsigned char barg = 0;
- void* entryPoint;
+ int neednilabel = 0;
+ struct timeval tvstart = {0, 0};
+#ifdef USE_GAS
+ byte* compiledcode;
+ int compiledsize;
+ void* entryPoint;
char fn_s[2*MAX_QPATH]; // output file for assembler code
char fn_o[2*MAX_QPATH]; // file written by as
#ifdef DEBUG_VM
@@ -420,16 +474,16 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
#endif
FILE* fh_s;
int fd_s, fd_o;
- byte* compiledcode;
- int compiledsize;
+
+ gettimeofday(&tvstart, NULL);
Com_Printf("compiling %s\n", vm->name);
#ifdef DEBUG_VM
snprintf(fn_s, sizeof(fn_s), "%.63s.s", vm->name);
snprintf(fn_o, sizeof(fn_o), "%.63s.o", vm->name);
- fd_s = open(fn_s, O_CREAT|O_WRONLY, 0644);
- fd_o = open(fn_o, O_CREAT|O_WRONLY, 0644);
+ fd_s = open(fn_s, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ fd_o = open(fn_o, O_CREAT|O_WRONLY|O_TRUNC, 0644);
#else
snprintf(fn_s, sizeof(fn_s), "/tmp/%.63s.s_XXXXXX", vm->name);
snprintf(fn_o, sizeof(fn_o), "/tmp/%.63s.o_XXXXXX", vm->name);
@@ -463,25 +517,50 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
return;
}
- // translate all instructions
- pc = 0;
- code = (char *)header + header->codeOffset;
-
emit("start:");
emit("or %%r8, %%r8"); // check whether to set up instruction pointers
emit("jnz main");
emit("jmp setupinstructionpointers");
emit("main:");
+#else // USE_GAS
+ int pass;
+ size_t compiledOfs = 0;
+
+ gettimeofday(&tvstart, NULL);
+
+ for (pass = 0; pass < 2; ++pass) {
+
+ if(pass)
+ {
+ compiledOfs = assembler_get_code_size();
+ vm->codeLength = compiledOfs;
+ vm->codeBase = mmap(NULL, compiledOfs, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+ if(vm->codeBase == (void*)-1)
+ Com_Error(ERR_DROP, "VM_CompileX86: can't mmap memory");
+
+ assembler_set_output((char*)vm->codeBase);
+ }
+
+ assembler_init(pass);
+
+#endif // USE_GAS
+
+ // translate all instructions
+ pc = 0;
+ code = (char *)header + header->codeOffset;
for ( instruction = 0; instruction < header->instructionCount; ++instruction )
{
op = code[ pc ];
++pc;
- vm->instructionPointers[instruction] = pc;
+#ifndef USE_GAS
+ vm->instructionPointers[instruction] = assembler_get_code_size();
+#endif
-#if 0
+ /* store current instruction number in r15 for debugging */
+#if 1
emit("nop");
emit("movq $%d, %%r15", instruction);
emit("nop");
@@ -502,7 +581,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
{
Dfprintf(qdasmout, "%s\n", opnames[op]);
}
+
+#ifdef USE_GAS
emit("i_%08x:", instruction);
+#else
+ if(neednilabel)
+ {
+ emit("i_%08x:", instruction);
+ neednilabel = 0;
+ }
+#endif
+
switch ( op )
{
case OP_UNDEF:
@@ -561,6 +650,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
// emit("frstor 4(%%rsi)");
emit("addq $4, %%rsi");
emit("movl %%eax, (%%rsi)"); // store return value
+ neednilabel = 1;
break;
case OP_PUSH:
emit("addq $4, %%rsi");
@@ -629,7 +719,8 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
emit("jp dojump_i_%08x", instruction);
emit("jz i_%08x", instruction+1);
emit("dojump_i_%08x:", instruction);
- emit("jmp i_%08x", iarg);
+ JMPIARG
+ neednilabel = 1;
#endif
break;
case OP_LTF:
@@ -856,7 +947,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
}
}
-
+#ifdef USE_GAS
emit("setupinstructionpointers:");
emit("movq $%lu, %%rax", (unsigned long)vm->instructionPointers);
for ( instruction = 0; instruction < header->instructionCount; ++instruction )
@@ -889,8 +980,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
vm->codeBase = compiledcode; // remember to skip ELF header!
vm->codeLength = compiledsize;
+#else // USE_GAS
+ }
+ assembler_init(0);
+
+ if(mprotect(vm->codeBase, compiledOfs, PROT_READ|PROT_EXEC))
+ Com_Error(ERR_DROP, "VM_CompileX86: mprotect failed");
+#endif // USE_GAS
+
vm->destroy = VM_Destroy_Compiled;
+#ifdef USE_GAS
entryPoint = getentrypoint(vm);
// __asm__ __volatile__ ("int3");
@@ -911,8 +1011,6 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
fclose(qdasmout);
#endif
- Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
-
out:
close(fd_o);
@@ -923,12 +1021,30 @@ out:
unlink(fn_s);
}
#endif
+#endif // USE_GAS
+
+ if(vm->compiled)
+ {
+ struct timeval tvdone = {0, 0};
+ struct timeval dur = {0, 0};
+ Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
+
+ gettimeofday(&tvdone, NULL);
+ timersub(&tvdone, &tvstart, &dur);
+ Com_Printf( "compilation took %lu.%06lu seconds\n", dur.tv_sec, dur.tv_usec );
+ }
}
void VM_Destroy_Compiled(vm_t* self)
{
+#ifdef USE_GAS
munmap(self->codeBase, self->codeLength);
+#elif _WIN32
+ VirtualFree(self->codeBase, self->codeLength, MEM_RELEASE);
+#else
+ munmap(self->codeBase, self->codeLength);
+#endif
}
/*
diff --git a/src/qcommon/vm_x86_64_assembler.c b/src/qcommon/vm_x86_64_assembler.c
new file mode 100644
index 00000000..1eda764f
--- /dev/null
+++ b/src/qcommon/vm_x86_64_assembler.c
@@ -0,0 +1,1419 @@
+/*
+===========================================================================
+vm_x86_64_assembler.c -- assembler for x86-64
+
+Copyright (C) 2007 Ludwig Nussel <ludwig.nussel@suse.de>, Novell inc.
+
+Quake III Arena source code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Quake III Arena source code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Quake III Arena source code; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+===========================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long u64;
+
+static char* out;
+static unsigned compiledOfs;
+static unsigned assembler_pass;
+
+static const char* cur_line;
+
+static FILE* fout;
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+#define crap(fmt, args...) do { \
+ _crap(__FUNCTION__, fmt, ##args); \
+} while(0)
+
+#define CRAP_INVALID_ARGS crap("invalid arguments %s, %s", argtype2str(arg1.type),argtype2str(arg2.type));
+
+#ifdef DEBUG
+#define debug(fmt, args...) printf(fmt, ##args)
+#else
+#define debug(fmt, args...)
+#endif
+
+static void _crap(const char* func, const char* fmt, ...)
+{
+ va_list ap;
+ fprintf(stderr, "%s() - ", func);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ if(cur_line && cur_line[0])
+ fprintf(stderr, "-> %s\n", cur_line);
+ exit(1);
+}
+
+static void emit1(unsigned char v)
+{
+ if(assembler_pass)
+ {
+ out[compiledOfs++] = v;
+ if(fout) fwrite(&v, 1, 1, fout);
+ debug("%02hhx ", v);
+ }
+ else
+ {
+ ++compiledOfs;
+ }
+}
+
+static inline void emit2(u16 v)
+{
+ emit1(v&0xFF);
+ emit1((v>>8)&0xFF);
+}
+
+static inline void emit4(u32 v)
+{
+ emit1(v&0xFF);
+ emit1((v>>8)&0xFF);
+ emit1((v>>16)&0xFF);
+ emit1((v>>24)&0xFF);
+}
+
+static inline void emit8(u64 v)
+{
+ emit4(v&0xFFFFFFFF);
+ emit4((v>>32)&0xFFFFFFFF);
+}
+
+enum {
+ REX_W = 0x08,
+ REX_R = 0x04,
+ REX_X = 0x02,
+ REX_B = 0x01,
+};
+
+enum {
+ MODRM_MOD_00 = 0x00,
+ MODRM_MOD_01 = 0x01 << 6,
+ MODRM_MOD_10 = 0x02 << 6,
+ MODRM_MOD_11 = 0x03 << 6,
+ MODRM_RM_SIB = 0x04,
+};
+
+typedef enum
+{
+ T_NONE = 0x00,
+ T_REGISTER = 0x01,
+ T_IMMEDIATE = 0x02,
+ T_MEMORY = 0x04,
+ T_LABEL = 0x08,
+ T_ABSOLUTE = 0x80
+} argtype_t;
+
+typedef enum {
+ R_8 = 0x100,
+ R_16 = 0x200,
+ R_64 = 0x800,
+ R_MSZ = 0xF00, // size mask
+ R_XMM = 0x2000, // xmm register. year, sucks
+ R_EAX = 0x00,
+ R_EBX = 0x03,
+ R_ECX = 0x01,
+ R_EDX = 0x02,
+ R_ESI = 0x06,
+ R_EDI = 0x07,
+ R_ESP = 0x04,
+ R_RAX = R_EAX | R_64,
+ R_RBX = R_EBX | R_64,
+ R_RCX = R_ECX | R_64,
+ R_RDX = R_EDX | R_64,
+ R_RSI = R_ESI | R_64,
+ R_RDI = R_EDI | R_64,
+ R_RSP = R_ESP | R_64,
+ R_R8 = 0x08 | R_64,
+ R_R9 = 0x09 | R_64,
+ R_R10 = 0x0A | R_64,
+ R_R15 = 0x0F | R_64,
+ R_AL = R_EAX | R_8,
+ R_AX = R_EAX | R_16,
+ R_CL = R_ECX | R_8,
+ R_XMM0 = 0x00 | R_XMM,
+ R_MGP = 0x0F, // mask for general purpose registers
+} reg_t;
+
+typedef enum {
+ MODRM_SIB = 0,
+ MODRM_NOSIB = 0x3,
+} modrm_sib_t;
+
+typedef struct {
+ unsigned disp;
+ argtype_t basetype;
+ union {
+ u64 imm;
+ reg_t reg;
+ } base;
+ argtype_t indextype;
+ union {
+ u64 imm;
+ reg_t reg;
+ } index;
+ unsigned scale;
+} memref_t;
+
+#define LABELLEN 32
+
+typedef struct {
+ argtype_t type;
+ union {
+ u64 imm;
+ reg_t reg;
+ memref_t mem;
+ char label[LABELLEN];
+ } v;
+ int absolute:1;
+} arg_t;
+
+typedef void (*emitfunc)(const char* op, arg_t arg1, arg_t arg2, void* data);
+
+typedef struct {
+ char* mnemonic;
+ emitfunc func;
+ void* data;
+} op_t;
+
+typedef struct {
+ u8 xmmprefix;
+ u8 subcode; // in modrm
+ u8 rmcode; // opcode for reg/mem, reg
+ u8 mrcode; // opcode for reg, reg/mem
+ u8 rcode8; // opcode for reg8/mem8
+ u8 rcode; // opcode for reg/mem
+} opparam_t;
+
+/* ************************* */
+
+static unsigned hashkey(const char *string, unsigned len) {
+ unsigned register hash, i;
+
+ hash = 0;
+ for (i = 0; i < len && string[i] != '\0'; ++i) {
+ hash += string[i] * (119 + i);
+ }
+ hash = (hash ^ (hash >> 10) ^ (hash >> 20));
+ return hash;
+}
+
+struct hashentry {
+ char* label;
+ unsigned address;
+ struct hashentry* next;
+};
+static struct hashentry* labelhash[1021];
+
+// no dup check!
+static void hash_add_label(const char* label, unsigned address)
+{
+ struct hashentry* h;
+ unsigned i = hashkey(label, -1U);
+ i %= sizeof(labelhash)/sizeof(labelhash[0]);
+ h = malloc(sizeof(struct hashentry));
+ h->label = strdup(label);
+ h->address = address;
+ h->next = labelhash[i];
+ labelhash[i] = h;
+}
+
+static unsigned lookup_label(const char* label)
+{
+ struct hashentry* h;
+ unsigned i = hashkey(label, -1U);
+ i %= sizeof(labelhash)/sizeof(labelhash[0]);
+ for(h = labelhash[i]; h; h = h->next )
+ {
+ if(!strcmp(h->label, label))
+ return h->address;
+ }
+ if(assembler_pass)
+ crap("label %s undefined", label);
+ return 0;
+}
+
+static void labelhash_free(void)
+{
+ struct hashentry* h;
+ unsigned i;
+ unsigned z = 0, min = -1U, max = 0, t = 0;
+ for ( i = 0; i < sizeof(labelhash)/sizeof(labelhash[0]); ++i)
+ {
+ unsigned n = 0;
+ h = labelhash[i];
+ while(h)
+ {
+ struct hashentry* next = h->next;
+ free(h->label);
+ free(h);
+ h = next;
+ ++n;
+ }
+ t+=n;
+ if(!n) ++z;
+ //else printf("%u\n", n);
+ min = MIN(min, n);
+ max = MAX(max, n);
+ }
+ printf("total %u, hsize %lu, zero %u, min %u, max %u\n", t, sizeof(labelhash)/sizeof(labelhash[0]), z, min, max);
+ memset(labelhash, 0, sizeof(labelhash));
+}
+
+/* ************************* */
+
+
+static const char* argtype2str(argtype_t t)
+{
+ switch(t)
+ {
+ case T_NONE: return "none";
+ case T_REGISTER: return "register";
+ case T_IMMEDIATE: return "immediate";
+ case T_MEMORY: return "memory";
+ case T_LABEL: return "label";
+ default: crap("invalid type");
+ }
+ /* not reached */
+ return T_NONE;
+}
+
+/* ************************* */
+
+static inline int iss8(u64 v)
+{
+ return (labs(v) <= 0x80);
+}
+
+static inline int isu8(u64 v)
+{
+ return (v <= 0xff);
+}
+
+static inline int iss16(u64 v)
+{
+ return (labs(v) <= 0x8000);
+}
+
+static inline int isu16(u64 v)
+{
+ return (v <= 0xffff);
+}
+
+static inline int iss32(u64 v)
+{
+ return (labs(v) <= 0x80000000);
+}
+
+static inline int isu32(u64 v)
+{
+ return (v <= 0xffffffff);
+}
+
+static void emit_opsingle(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 op = (u8)((unsigned long) data);
+
+ if(arg1.type != T_NONE || arg2.type != T_NONE)
+ CRAP_INVALID_ARGS;
+
+ emit1(op);
+}
+
+static void emit_opsingle16(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ emit1(0x66);
+ emit_opsingle(mnemonic, arg1, arg2, data);
+}
+
+static void compute_rexmodrmsib(u8* rex_r, u8* modrm_r, u8* sib_r, arg_t* arg1, arg_t* arg2)
+{
+ u8 rex = 0;
+ u8 modrm = 0;
+ u8 sib = 0;
+
+ if((arg1->type == T_REGISTER && arg2->type == T_REGISTER)
+ && ((arg1->v.reg & R_MSZ) != (arg2->v.reg & R_MSZ))
+ && !((arg1->v.reg & R_XMM) || (arg2->v.reg & R_XMM)))
+ crap("both registers must be of same width");
+
+ if((arg1->type == T_REGISTER && arg1->v.reg & R_64)
+ || (arg2->type == T_REGISTER && arg2->v.reg & R_64))
+ {
+ rex |= REX_W;
+ }
+
+ if(arg1->type == T_REGISTER)
+ {
+ if((arg1->v.reg & R_MGP) > 0x07)
+ rex |= REX_R;
+
+ modrm |= (arg1->v.reg & 0x07) << 3;
+ }
+
+ if(arg2->type == T_REGISTER)
+ {
+ if((arg2->v.reg & R_MGP) > 0x07)
+ rex |= REX_B;
+
+ modrm |= (arg2->v.reg & 0x07);
+ }
+
+ if(arg2->type == T_MEMORY)
+ {
+ if((arg2->v.mem.basetype == T_REGISTER && !(arg2->v.mem.base.reg & R_64))
+ || (arg2->v.mem.indextype == T_REGISTER && !(arg2->v.mem.index.reg & R_64)))
+ {
+ crap("only 64bit base/index registers are %x %x", arg2->v.mem.base.reg, arg2->v.mem.index.reg);
+ }
+
+ if(arg2->v.mem.indextype == T_REGISTER)
+ {
+ modrm |= MODRM_RM_SIB;
+ if(!arg2->v.mem.disp)
+ {
+ modrm |= MODRM_MOD_00;
+ }
+ else if(iss8(arg2->v.mem.disp))
+ {
+ modrm |= MODRM_MOD_01;
+ }
+ else if(isu32(arg2->v.mem.disp))
+ {
+ modrm |= MODRM_MOD_10;
+ }
+ else
+ {
+ crap("invalid displacement");
+ }
+
+ if((arg2->v.mem.index.reg & R_MGP) > 0x07)
+ rex |= REX_X;
+
+ if((arg2->v.mem.base.reg & R_MGP) > 0x07)
+ rex |= REX_B;
+
+ if(arg2->v.mem.basetype != T_REGISTER)
+ crap("base must be register");
+ switch(arg2->v.mem.scale)
+ {
+ case 1: break;
+ case 2: sib |= 1 << 6; break;
+ case 4: sib |= 2 << 6; break;
+ case 8: sib |= 3 << 6; break;
+ }
+ sib |= (arg2->v.mem.index.reg & 0x07) << 3;
+ sib |= (arg2->v.mem.base.reg & 0x07);
+ }
+ else if(arg2->v.mem.indextype == T_NONE)
+ {
+ if(!arg2->v.mem.disp)
+ {
+ modrm |= MODRM_MOD_00;
+ }
+ else if(iss8(arg2->v.mem.disp))
+ {
+ modrm |= MODRM_MOD_01;
+ }
+ else if(isu32(arg2->v.mem.disp))
+ {
+ modrm |= MODRM_MOD_10;
+ }
+ else
+ {
+ crap("invalid displacement");
+ }
+
+ if(arg2->v.mem.basetype != T_REGISTER)
+ crap("todo: base != register");
+
+ if((arg2->v.mem.base.reg & R_MGP) > 0x07)
+ rex |= REX_B;
+
+ modrm |= arg2->v.mem.base.reg & 0x07;
+ }
+ else
+ {
+ crap("invalid indextype");
+ }
+ }
+ else
+ {
+ modrm |= MODRM_MOD_11;
+ }
+
+ if(rex)
+ rex |= 0x40; // XXX
+
+ *rex_r = rex;
+ *modrm_r = modrm;
+ *sib_r = sib;
+}
+
+static void maybe_emit_displacement(arg_t* arg)
+{
+ if(arg->type != T_MEMORY)
+ return;
+
+ if(arg->v.mem.disp)
+ {
+ if(iss8(arg->v.mem.disp))
+ {
+ emit1((u8)arg->v.mem.disp);
+ }
+ else if(isu32(arg->v.mem.disp))
+ {
+ emit4(arg->v.mem.disp);
+ }
+ else
+ {
+ crap("invalid displacement");
+ }
+ }
+}
+
+/* one byte operator with register added to operator */
+static void emit_opreg(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 op = (u8)((unsigned long) data);
+
+ if(arg1.type != T_REGISTER || arg2.type != T_NONE)
+ CRAP_INVALID_ARGS;
+
+ if((arg1.v.reg & R_MGP) > 0x07)
+ emit1(0x40 | REX_B);
+
+ op |= (arg1.v.reg & 0x07);
+
+ emit1(op);
+}
+
+/* operator which operates on reg/mem */
+static void emit_op_rm(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 rex, modrm, sib;
+ opparam_t* params = data;
+
+ if((arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
+ CRAP_INVALID_ARGS;
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+ modrm |= params->subcode << 3;
+
+ if(arg1.v.reg & R_16)
+ emit1(0x66);
+
+ if(rex) emit1(rex);
+ if(arg1.v.reg & R_8)
+ emit1(params->rcode8); // op reg8/mem8,
+ else
+ emit1(params->rcode); // op reg/mem,
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg1);
+}
+
+/* operator which operates on reg/mem with cl */
+static void emit_op_rm_cl(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 rex, modrm, sib;
+ opparam_t* params = data;
+
+ if(arg2.type != T_REGISTER || arg1.type != T_REGISTER)
+ CRAP_INVALID_ARGS;
+
+ if((arg1.v.reg & R_MGP) != R_ECX && !(arg1.v.reg & R_8))
+ crap("only cl register is valid");
+
+ arg1.type = T_NONE; // don't complain, we know it's cl anyways
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ modrm |= params->subcode << 3;
+
+ if(arg2.v.reg & R_16)
+ emit1(0x66);
+
+ if(rex) emit1(rex);
+ if(arg2.v.reg & R_8)
+ emit1(params->rcode8); // op reg8/mem8,
+ else
+ emit1(params->rcode); // op reg/mem,
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg2);
+}
+
+static void emit_mov(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 rex = 0;
+ u8 modrm = 0;
+ u8 sib = 0;
+
+ if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
+ {
+ u8 op = 0xb8;
+
+ if(arg2.v.reg & R_8)
+ {
+ if(!isu8(arg1.v.imm))
+ crap("value too large for 8bit register");
+
+ op = 0xb0;
+ }
+ else if(arg2.v.reg & R_16)
+ {
+ if(!isu16(arg1.v.imm))
+ crap("value too large for 16bit register");
+ emit1(0x66);
+ }
+ else if(!arg2.v.reg & R_64)
+ {
+ if(!isu32(arg1.v.imm))
+ crap("value too large for 32bit register");
+ }
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ if(rex) emit1(rex);
+
+ op |= (arg2.v.reg & 0x07);
+
+ emit1(op);
+
+ if(arg2.v.reg & R_8) emit1(arg1.v.imm);
+ else if(arg2.v.reg & R_16) emit2(arg1.v.imm);
+ else if(arg2.v.reg & R_64) emit8(arg1.v.imm);
+ else emit4(arg1.v.imm);
+ }
+ else if(arg1.type == T_IMMEDIATE && arg2.type == T_MEMORY)
+ {
+ if(!iss32(arg1.v.imm))
+ {
+ crap("only 32bit immediates supported");
+ }
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+ if(rex) emit1(rex);
+ emit1(0xc7); // mov reg/mem, imm
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ emit4(arg1.v.imm);
+ }
+ else if(arg1.type == T_REGISTER && arg2.type == T_REGISTER) // XXX: same as next
+ {
+ if(arg1.type != T_REGISTER || arg2.type != T_REGISTER)
+ crap("both args must be registers");
+
+ if((arg1.v.reg & R_MSZ) != (arg2.v.reg & R_MSZ))
+ crap("both registers must be same width");
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ if(rex) emit1(rex);
+ emit1(0x89); // mov reg reg/mem,
+ emit1(modrm);
+ }
+ else if(arg1.type == T_REGISTER && arg2.type == T_MEMORY)
+ {
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ if(arg1.v.reg & R_16)
+ emit1(0x66);
+
+ if(rex) emit1(rex);
+ if(arg1.v.reg & R_8)
+ emit1(0x88); // mov reg reg/mem,
+ else
+ emit1(0x89); // mov reg reg/mem,
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg2);
+ }
+ else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER)
+ {
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+ if(arg2.v.reg & R_16)
+ emit1(0x66);
+
+ if(rex) emit1(rex);
+ if(arg2.v.reg & R_8)
+ emit1(0x8a); // mov reg/mem, reg
+ else
+ emit1(0x8b); // mov reg/mem, reg
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg1);
+ }
+ else
+ CRAP_INVALID_ARGS;
+}
+
+static void emit_subaddand(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 rex = 0;
+ u8 modrm = 0;
+ u8 sib = 0;
+
+ opparam_t* params = data;
+
+ if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
+ {
+ if(!iss32(arg1.v.imm))
+ {
+ crap("only 8 and 32 bit immediates supported");
+ }
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ modrm |= params->subcode << 3;
+
+ if(rex) emit1(rex);
+#if 0
+ if(isu8(arg1.v.imm))
+ {
+ emit1(0x83); // sub reg/mem, imm8
+ emit1(modrm);
+ emit1(arg1.v.imm&0xFF);
+ }
+ else
+#endif
+ {
+ emit1(0x81); // sub reg/mem, imm32
+ emit1(modrm);
+ emit4(arg1.v.imm);
+ }
+ }
+ else if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
+ {
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ if(rex) emit1(rex);
+ emit1(params->rmcode); // sub reg/mem, reg
+ emit1(modrm);
+ if(arg2.type == T_MEMORY && (modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg2);
+ }
+ else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
+ {
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+ if(rex) emit1(rex);
+ emit1(params->mrcode); // sub reg, reg/mem
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg1);
+ }
+ else
+ CRAP_INVALID_ARGS;
+}
+
+static void emit_condjump(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ unsigned off;
+ int disp;
+ unsigned char opcode = (unsigned char)(((unsigned long)data)&0xFF);
+
+ if(arg1.type != T_LABEL || arg2.type != T_NONE)
+ crap("%s: argument must be label", mnemonic);
+
+ emit1(opcode);
+
+ off = lookup_label(arg1.v.label);
+ disp = off-(compiledOfs+1);
+ if(assembler_pass && abs(disp) > 127)
+ crap("cannot jump that far (%x -> %x = %x)", compiledOfs, off, disp);
+
+ emit1(disp);
+}
+
+static void emit_jmp(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ if((arg1.type != T_LABEL && arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
+ CRAP_INVALID_ARGS;
+
+ if(arg1.type == T_LABEL)
+ {
+ unsigned off;
+ int disp;
+
+ off = lookup_label(arg1.v.label);
+ disp = off-(compiledOfs+5);
+ emit1(0xe9);
+ emit4(disp);
+ }
+ else
+ {
+ u8 rex, modrm, sib;
+
+ if(arg1.type == T_REGISTER)
+ {
+ if(!arg1.absolute)
+ crap("jmp must be absolute");
+
+ if((arg1.v.reg & R_64) != R_64)
+ crap("register must be 64bit");
+
+ arg1.v.reg ^= R_64; // no rex required for call
+ }
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+ modrm |= 0x4 << 3;
+
+ if(rex) emit1(rex);
+ emit1(0xff);
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+ maybe_emit_displacement(&arg1);
+ }
+}
+
+static void emit_call(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 rex, modrm, sib;
+
+ if(arg1.type != T_REGISTER || arg2.type != T_NONE)
+ CRAP_INVALID_ARGS;
+
+ if(!arg1.absolute)
+ crap("call must be absolute");
+
+ if((arg1.v.reg & R_64) != R_64)
+ crap("register must be 64bit");
+
+ arg1.v.reg ^= R_64; // no rex required for call
+
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+ modrm |= 0x2 << 3;
+
+ if(rex) emit1(rex);
+ emit1(0xff);
+ emit1(modrm);
+}
+
+
+static void emit_twobyte(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+ u8 rex, modrm, sib;
+
+ opparam_t* params = data;
+
+ if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
+ {
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+ if(params->xmmprefix) emit1(params->xmmprefix);
+ if(rex) emit1(rex);
+ emit1(0x0f);
+ emit1(params->rmcode); // sub reg/mem, reg
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg2);
+ }
+ else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
+ {
+ compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+ if(params->xmmprefix) emit1(params->xmmprefix);
+ if(rex) emit1(rex);
+ emit1(0x0f);
+ emit1(params->mrcode); // sub reg, reg/mem
+ emit1(modrm);
+ if((modrm & 0x07) == MODRM_RM_SIB)
+ emit1(sib);
+
+ maybe_emit_displacement(&arg1);
+ }
+ else
+ CRAP_INVALID_ARGS;
+}
+
+static opparam_t params_add = { subcode: 0, rmcode: 0x01, };
+static opparam_t params_or = { subcode: 1, rmcode: 0x09, };
+static opparam_t params_and = { subcode: 4, rmcode: 0x21, };
+static opparam_t params_sub = { subcode: 5, rmcode: 0x29, };
+static opparam_t params_xor = { subcode: 6, rmcode: 0x31, };
+static opparam_t params_cmp = { subcode: 6, rmcode: 0x39, mrcode: 0x3b, };
+static opparam_t params_dec = { subcode: 1, rcode: 0xff, rcode8: 0xfe, };
+static opparam_t params_sar = { subcode: 7, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_shl = { subcode: 4, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_shr = { subcode: 5, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_idiv = { subcode: 7, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_div = { subcode: 6, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_imul = { subcode: 5, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_mul = { subcode: 4, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_neg = { subcode: 3, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_not = { subcode: 2, rcode: 0xf7, rcode8: 0xf6, };
+
+static opparam_t params_cvtsi2ss = { xmmprefix: 0xf3, rmcode: 0x2a };
+static opparam_t params_cvttss2si = { xmmprefix: 0xf3, rmcode: 0x2c };
+static opparam_t params_addss = { xmmprefix: 0xf3, mrcode: 0x58 };
+static opparam_t params_divss = { xmmprefix: 0xf3, mrcode: 0x5e };
+static opparam_t params_movss = { xmmprefix: 0xf3, mrcode: 0x10, rmcode: 0x11 };
+static opparam_t params_mulss = { xmmprefix: 0xf3, mrcode: 0x59 };
+static opparam_t params_subss = { xmmprefix: 0xf3, mrcode: 0x5c };
+static opparam_t params_ucomiss = { mrcode: 0x2e };
+
+static int ops_sorted = 0;
+static op_t ops[] = {
+ { "addl", emit_subaddand, &params_add },
+ { "addq", emit_subaddand, &params_add },
+ { "addss", emit_twobyte, &params_addss },
+ { "andl", emit_subaddand, &params_and },
+ { "andq", emit_subaddand, &params_and },
+ { "callq", emit_call, NULL },
+ { "cbw", emit_opsingle16, (void*)0x98 },
+ { "cdq", emit_opsingle, (void*)0x99 },
+ { "cmpl", emit_subaddand, &params_cmp },
+ { "cmpq", emit_subaddand, &params_cmp },
+ { "cvtsi2ss", emit_twobyte, &params_cvtsi2ss },
+ { "cvttss2si", emit_twobyte, &params_cvttss2si },
+ { "cwde", emit_opsingle, (void*)0x98 },
+ { "decl", emit_op_rm, &params_dec },
+ { "decq", emit_op_rm, &params_dec },
+ { "divl", emit_op_rm, &params_div },
+ { "divq", emit_op_rm, &params_div },
+ { "divss", emit_twobyte, &params_divss },
+ { "idivl", emit_op_rm, &params_idiv },
+ { "imull", emit_op_rm, &params_imul },
+ { "int3", emit_opsingle, (void*)0xcc },
+ { "ja", emit_condjump, (void*)0x77 },
+ { "jbe", emit_condjump, (void*)0x76 },
+ { "jb", emit_condjump, (void*)0x72 },
+ { "je", emit_condjump, (void*)0x74 },
+ { "jl", emit_condjump, (void*)0x7c },
+ { "jmp", emit_jmp, NULL },
+ { "jmpq", emit_jmp, NULL },
+ { "jnae", emit_condjump, (void*)0x72 },
+ { "jna", emit_condjump, (void*)0x76 },
+ { "jnbe", emit_condjump, (void*)0x77 },
+ { "jnb", emit_condjump, (void*)0x73 },
+ { "jnc", emit_condjump, (void*)0x73 },
+ { "jne", emit_condjump, (void*)0x75 },
+ { "jnge", emit_condjump, (void*)0x7c },
+ { "jng", emit_condjump, (void*)0x7e },
+ { "jnle", emit_condjump, (void*)0x7f },
+ { "jnl", emit_condjump, (void*)0x7d },
+ { "jnz", emit_condjump, (void*)0x75 },
+ { "jp", emit_condjump, (void*)0x7a },
+ { "jz", emit_condjump, (void*)0x74 },
+ { "movb", emit_mov, NULL },
+ { "movl", emit_mov, NULL },
+ { "movq", emit_mov, NULL },
+ { "movss", emit_twobyte, &params_movss },
+ { "movw", emit_mov, NULL },
+ { "mull", emit_op_rm, &params_mul },
+ { "mulss", emit_twobyte, &params_mulss },
+ { "negl", emit_op_rm, &params_neg },
+ { "negq", emit_op_rm, &params_neg },
+ { "nop", emit_opsingle, (void*)0x90 },
+ { "notl", emit_op_rm, &params_not },
+ { "notq", emit_op_rm, &params_not },
+ { "or", emit_subaddand, &params_or },
+ { "orl", emit_subaddand, &params_or },
+ { "pop", emit_opreg, (void*)0x58 },
+ { "push", emit_opreg, (void*)0x50 },
+ { "ret", emit_opsingle, (void*)0xc3 },
+ { "sarl", emit_op_rm_cl, &params_sar },
+ { "shl", emit_op_rm_cl, &params_shl },
+ { "shrl", emit_op_rm_cl, &params_shr },
+ { "subl", emit_subaddand, &params_sub },
+ { "subq", emit_subaddand, &params_sub },
+ { "subss", emit_twobyte, &params_subss },
+ { "ucomiss", emit_twobyte, &params_ucomiss },
+ { "xorl", emit_subaddand, &params_xor },
+ { "xorq", emit_subaddand, &params_xor },
+ { NULL, NULL, NULL }
+};
+
+static int opsort(const void* A, const void* B)
+{
+ const op_t* a = A;
+ const op_t* b = B;
+ return strcmp(a->mnemonic, b->mnemonic);
+}
+
+static op_t* getop(const char* n)
+{
+#if 0
+ op_t* o = ops;
+ while(o->mnemonic)
+ {
+ if(!strcmp(o->mnemonic, n))
+ return o;
+ ++o;
+ }
+
+#else
+ unsigned m, t, b;
+ int r;
+ t = sizeof(ops)/sizeof(ops[0])-1;
+ b = 0;
+
+ while(b <= t)
+ {
+ m = ((t-b)>>1) + b;
+ if((r = strcmp(ops[m].mnemonic, n)) == 0)
+ {
+ return &ops[m];
+ }
+ else if(r < 0)
+ {
+ b = m + 1;
+ }
+ else
+ {
+ t = m - 1;
+ }
+ }
+#endif
+
+ return NULL;
+}
+
+static reg_t parsereg(const char* str)
+{
+ const char* s = str;
+ if(*s == 'a' && s[1] == 'l' && !s[2])
+ {
+ return R_AL;
+ }
+ else if(*s == 'a' && s[1] == 'x' && !s[2])
+ {
+ return R_AX;
+ }
+ if(*s == 'c' && s[1] == 'l' && !s[2])
+ {
+ return R_CL;
+ }
+ if(*s == 'x')
+ {
+ if(!strcmp(s, "xmm0"))
+ return R_XMM0;
+ }
+ else if(*s == 'r' && s[1])
+ {
+ ++s;
+ if(s[1] == 'x')
+ {
+ switch(*s++)
+ {
+ case 'a': return R_RAX;
+ case 'b': return R_RBX;
+ case 'c': return R_RCX;
+ case 'd': return R_RDX;
+ }
+ }
+ else if(s[1] == 'i')
+ {
+ switch(*s++)
+ {
+ case 's': return R_RSI;
+ case 'd': return R_RDI;
+ }
+ }
+ else if(s[0] == 's' && s[1] == 'p' && !s[2])
+ {
+ return R_RSP;
+ }
+ else if(*s == '8' && !s[1])
+ return R_R8;
+ else if(*s == '9' && !s[1])
+ return R_R9;
+ else if(*s == '1' && s[1] == '0')
+ return R_R10;
+ else if(*s == '1' && s[1] == '5')
+ return R_R15;
+ }
+ else if(*s == 'e' && s[1])
+ {
+ ++s;
+ if(s[1] == 'x')
+ {
+ switch(*s++)
+ {
+ case 'a': return R_EAX;
+ case 'b': return R_EBX;
+ case 'c': return R_ECX;
+ case 'd': return R_EDX;
+ }
+ }
+ else if(s[1] == 'i')
+ {
+ switch(*s++)
+ {
+ case 's': return R_ESI;
+ case 'd': return R_EDI;
+ }
+ }
+ }
+
+ crap("invalid register %s", str);
+
+ return 0;
+}
+
+typedef enum {
+ TOK_LABEL = 0x80,
+ TOK_INT = 0x81,
+ TOK_END = 0x82,
+ TOK_INVALID = 0x83,
+} token_t;
+
+static unsigned char nexttok(const char** str, char* label, u64* val)
+{
+ const char* s = *str;
+
+ if(label) *label = 0;
+ if(val) *val = 0;
+
+ while(*s && *s == ' ') ++s;
+
+ if(!*s)
+ {
+ return TOK_END;
+ }
+ else if(*s == '$' || *s == '*' || *s == '%' || *s == '-' || *s == ')' || *s == '(' || *s == ',')
+ {
+ *str = s+1;
+ return *s;
+ }
+ else if(*s >= 'a' && *s <= 'z')
+ {
+ size_t a = strspn(s+1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
+ if(a+1 >= LABELLEN)
+ crap("label %s too long", s);
+ if(label)
+ {
+ strncpy(label, s, a+1);
+ label[a+1] = 0;
+ }
+ *str = s+a+1;
+ return TOK_LABEL;
+ }
+ else if(*s >= '0' && *s <= '9')
+ {
+ char* endptr = NULL;
+ u64 v = strtol(s, &endptr, 0);
+ if(endptr && (endptr-s == 0))
+ crap("invalid integer %s", s);
+ if(val) *val = v;
+ *str = endptr;
+ return TOK_INT;
+ }
+ crap("can't parse '%s'", *str);
+ return TOK_INVALID;
+}
+
+static arg_t parsearg(const char** str)
+{
+ arg_t arg;
+ const char* s = *str;
+ char label[20];
+ u64 val;
+ int negative = 1;
+ unsigned ttype;
+
+ arg.type = T_NONE;
+ arg.absolute = 0;
+
+ while(*s && *s == ' ') ++s;
+
+ switch(nexttok(&s, label, &val))
+ {
+ case '$' :
+ ttype = nexttok(&s, NULL, &val);
+ if(ttype == '-')
+ {
+ negative = -1;
+ ttype = nexttok(&s, NULL, &val);
+ }
+ if(ttype != TOK_INT)
+ crap("expected integer");
+ arg.type = T_IMMEDIATE;
+ arg.v.imm = negative * val;
+ break;
+ case '*' :
+ if((ttype = nexttok(&s, NULL, NULL)) != '%')
+ {
+ if(ttype == '(')
+ goto tok_memory;
+ crap("expected '%%'");
+ }
+ arg.absolute = 1;
+ /* fall through */
+ case '%' :
+ if(nexttok(&s, label, &val) != TOK_LABEL)
+ crap("expected label");
+ arg.type = T_REGISTER;
+ arg.v.reg = parsereg(label);
+ break;
+ case TOK_LABEL:
+ arg.type = T_LABEL;
+ strncpy(arg.v.label, label, LABELLEN);
+ break;
+ case '-':
+ negative = -1;
+ if(nexttok(&s, NULL, &val) != TOK_INT)
+ crap("expected integer");
+ /* fall through */
+ case TOK_INT:
+ if(nexttok(&s, label, NULL) != '(')
+ crap("expected '('"); // mov to/from fixed address not supported
+ /* fall through */
+ case '(':
+tok_memory:
+ arg.type = T_MEMORY;
+ arg.v.mem.indextype = T_NONE;
+ arg.v.mem.disp = negative * val;
+ ttype = nexttok(&s, label, &val);
+ if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
+ {
+ crap("expected register");
+ }
+ if (ttype == '%')
+ {
+ arg.v.mem.basetype = T_REGISTER;
+ arg.v.mem.base.reg = parsereg(label);
+ }
+ else if (ttype == TOK_INT)
+ {
+ arg.v.mem.basetype = T_IMMEDIATE;
+ arg.v.mem.base.imm = val;
+ }
+ if((ttype = nexttok(&s, NULL, NULL)) == ',')
+ {
+ ttype = nexttok(&s, label, &val);
+ if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
+ {
+ crap("expected register");
+ }
+ if (ttype == '%')
+ {
+ arg.v.mem.indextype = T_REGISTER;
+ arg.v.mem.index.reg = parsereg(label);
+ }
+ else if (ttype == TOK_INT)
+ {
+ crap("index must be register");
+ arg.v.mem.indextype = T_IMMEDIATE;
+ arg.v.mem.index.imm = val;
+ }
+ if(nexttok(&s, NULL, NULL) != ',')
+ crap("expected ','");
+ if(nexttok(&s, NULL, &val) != TOK_INT)
+ crap("expected integer");
+ if(val != 1 && val != 2 && val != 4 && val != 8)
+ crap("scale must 1, 2, 4 or 8");
+ arg.v.mem.scale = val;
+
+ ttype = nexttok(&s, NULL, NULL);
+ }
+ if(ttype != ')')
+ {
+ crap("expected ')' or ','");
+ }
+ break;
+ default:
+ crap("invalid token %hhu in %s", *(unsigned char*)s, *str);
+ break;
+ }
+
+ *str = s;
+
+ return arg;
+}
+
+/* ************************* */
+
+void assembler_init(int pass)
+{
+ compiledOfs = 0;
+ assembler_pass = pass;
+ if(!pass)
+ {
+ labelhash_free();
+ cur_line = NULL;
+ }
+ if(!ops_sorted)
+ {
+ ops_sorted = 1;
+ qsort(ops, sizeof(ops)/sizeof(ops[0])-1, sizeof(ops[0]), opsort);
+ }
+}
+
+size_t assembler_get_code_size(void)
+{
+ return compiledOfs;
+}
+
+void assembler_set_output(char* buf)
+{
+ out = buf;
+}
+
+void assemble_line(const char* input, size_t len)
+{
+ char line[4096];
+ char* s;
+ op_t* o;
+ char* opn;
+ arg_t arg1, arg2;
+
+ arg1.type = T_NONE;
+ arg2.type = T_NONE;
+ opn = NULL;
+ o = NULL;
+
+ if(len < 1)
+ return;
+
+ if(len >= sizeof(line))
+ crap("line too long");
+
+ memcpy(line, input, sizeof(line));
+ cur_line = input;
+
+ if(line[len-1] == '\n') line[--len] = 0;
+ if(line[len-1] == ':')
+ {
+ line[--len] = 0;
+ if(assembler_pass)
+ debug("%s: 0x%x\n", line, compiledOfs);
+ else
+ hash_add_label(line, compiledOfs);
+ }
+ else
+ {
+ opn = line;
+ s = strchr(line, ' ');
+ if(s)
+ {
+ *s++ = 0;
+ arg1 = parsearg((const char**)&s);
+ if(*s)
+ {
+ if(*s != ',')
+ crap("expected ',', got '%c'", *s);
+ ++s;
+ arg2 = parsearg((const char**)&s);
+ }
+ }
+
+ if(!opn)
+ {
+ crap("no operator in %s", line);
+ }
+
+ o = getop(opn);
+ if(!o)
+ {
+ crap("cannot handle op %s", opn);
+ }
+ o->func(opn, arg1, arg2, o->data);
+ if(assembler_pass)
+ debug(" - %s%s", cur_line, cur_line[strlen(cur_line)-1]=='\n'?"":"\n");
+ }
+}
+
+#ifdef SA_STANDALONE
+int main(int argc, char* argv[])
+{
+ char line[4096];
+ size_t len;
+ int pass;
+ FILE* file = NULL;
+
+ if(argc < 2)
+ {
+ crap("specify file");
+ }
+
+ file = fopen(argv[1], "r");
+ if(!file)
+ {
+ crap("can't open file");
+ }
+
+ if(argc > 2)
+ {
+ fout = fopen(argv[2], "w");
+ if(!fout)
+ {
+ crap("can't open %s for writing", argv[2]);
+ }
+ }
+
+ for(pass = 0; pass < 2; ++pass)
+ {
+ if(fseek(file, 0, SEEK_SET))
+ crap("can't rewind file");
+
+ if(pass)
+ {
+ char* b = malloc(assembler_get_code_size());
+ if(!b)
+ crap("cannot allocate memory");
+ assembler_set_output(b);
+ }
+
+ assembler_init(pass);
+
+ while(fgets(line, sizeof(line), file))
+ {
+ len = strlen(line);
+ if(!len) continue;
+
+ assemble_line(line, len);
+ }
+ }
+
+ assembler_init(0);
+
+ fclose(file);
+
+ return 0;
+}
+#endif