20 files changed, 4989 insertions, 182 deletions
diff --git a/Makefile b/Makefile
index 2f3f339c..37ec7fe1 100644
--- a/Makefile
+++ b/Makefile
@@ -924,6 +924,7 @@ Q3OBJ = \
   $(B)/client/q_shared.o \
   \
   $(B)/client/unzip.o \
+  $(B)/client/puff.o \
   $(B)/client/vm.o \
   $(B)/client/vm_interpreted.o \
   \
@@ -999,7 +1000,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
     Q3OBJ += $(B)/client/vm_x86.o
   endif
   ifeq ($(ARCH),x86_64)
-    Q3OBJ += $(B)/client/vm_x86_64.o
+    Q3OBJ += $(B)/client/vm_x86_64.o $(B)/client/vm_x86_64_assembler.o
   endif
   ifeq ($(ARCH),ppc)
     Q3OBJ += $(B)/client/vm_ppc.o
@@ -1125,7 +1126,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
     Q3DOBJ += $(B)/ded/vm_x86.o
   endif
   ifeq ($(ARCH),x86_64)
-    Q3DOBJ += $(B)/ded/vm_x86_64.o
+    Q3DOBJ += $(B)/ded/vm_x86_64.o $(B)/client/vm_x86_64_assembler.o
   endif
   ifeq ($(ARCH),ppc)
     Q3DOBJ += $(B)/ded/vm_ppc.o
diff --git a/src/client/cl_keys.c b/src/client/cl_keys.c
index b283ffbc..950cb5b1 100644
--- a/src/client/cl_keys.c
+++ b/src/client/cl_keys.c
@@ -307,7 +307,7 @@ EDIT FIELDS
 Field_Draw
 
 Handles horizontal scrolling and cursor blinking
-x, y, amd width are in pixels
+x, y, and width are in pixels
 ===================
 */
 void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, qboolean showCursor ) {
@@ -318,8 +318,8 @@ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, q
 	char	str[MAX_STRING_CHARS];
 	int		i;
 
-	drawLen = edit->widthInChars;
-	len = strlen( edit->buffer ) + 1;
+	drawLen = edit->widthInChars - 1; // - 1 so there is always a space for the cursor
+	len = strlen( edit->buffer );
 
 	// guarantee that cursor will be visible
 	if ( len <= drawLen ) {
@@ -332,14 +332,6 @@ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, q
 			}
 		}
 		prestep = edit->scroll;
-
-/*
-		if ( edit->cursor < len - drawLen ) {
-			prestep = edit->cursor;	// cursor at start
-		} else {
-			prestep = len - drawLen;
-		}
-*/
 	}
 
 	if ( prestep + drawLen > len ) {
@@ -380,7 +372,7 @@ void Field_VariableSizeDraw( field_t *edit, int x, int y, int width, int size, q
 		cursorChar = 10;
 	}
 
-	i = drawLen - ( Q_PrintStrlen( str ) + 1 );
+	i = drawLen - Q_PrintStrlen( str );
 
 	if ( size == SMALLCHAR_WIDTH ) {
 		SCR_DrawSmallChar( x + ( edit->cursor - prestep - i ) * size, y, cursorChar );
@@ -445,54 +437,50 @@ void Field_KeyDownEvent( field_t *edit, int key ) {
 		return;
 	}
 
+	key = tolower( key );
 	len = strlen( edit->buffer );
 
-	if ( key == K_DEL ) {
-		if ( edit->cursor < len ) {
-			memmove( edit->buffer + edit->cursor, 
-				edit->buffer + edit->cursor + 1, len - edit->cursor );
-		}
-		return;
-	}
+	switch ( key ) {
+		case K_DEL:
+			if ( edit->cursor < len ) {
+				memmove( edit->buffer + edit->cursor, 
+					edit->buffer + edit->cursor + 1, len - edit->cursor );
+			}
+			break;
 
-	if ( key == K_RIGHTARROW ) 
-	{
-		if ( edit->cursor < len ) {
-			edit->cursor++;
-		}
+		case K_RIGHTARROW:
+			if ( edit->cursor < len ) {
+				edit->cursor++;
+			}
+			break;
 
-		if ( edit->cursor >= edit->scroll + edit->widthInChars && edit->cursor <= len )
-		{
-			edit->scroll++;
-		}
-		return;
-	}
+		case K_LEFTARROW:
+			if ( edit->cursor > 0 ) {
+				edit->cursor--;
+			}
+			break;
 
-	if ( key == K_LEFTARROW ) 
-	{
-		if ( edit->cursor > 0 ) {
-			edit->cursor--;
-		}
-		if ( edit->cursor < edit->scroll )
-		{
-			edit->scroll--;
-		}
-		return;
-	}
+		case K_HOME:
+			edit->cursor = 0;
+			break;
 
-	if ( key == K_HOME || ( tolower(key) == 'a' && keys[K_CTRL].down ) ) {
-		edit->cursor = 0;
-		return;
-	}
+		case K_END:
+			edit->cursor = len;
+			break;
 
-	if ( key == K_END || ( tolower(key) == 'e' && keys[K_CTRL].down ) ) {
-		edit->cursor = len;
-		return;
+		case K_INS:
+			key_overstrikeMode = !key_overstrikeMode;
+			break;
+
+		default:
+			break;
 	}
 
-	if ( key == K_INS ) {
-		key_overstrikeMode = !key_overstrikeMode;
-		return;
+	// Change scroll if cursor is no longer visible
+	if ( edit->cursor < edit->scroll ) {
+		edit->scroll = edit->cursor;
+	} else if ( edit->cursor >= edit->scroll + edit->widthInChars && edit->cursor <= len ) {
+		edit->scroll = edit->cursor - edit->widthInChars + 1;
 	}
 }
 
diff --git a/src/null/null_client.c b/src/null/null_client.c
index 994f53f4..2b98195c 100644
--- a/src/null/null_client.c
+++ b/src/null/null_client.c
@@ -86,4 +86,4 @@ void CL_StartHunkUsers( void ) {
 }
 
 // bk001119 - added new dummy for sv_init.c
-void CL_ShutdownAll(void) {};
+void CL_ShutdownAll(void) {}
diff --git a/src/qcommon/cm_patch.c b/src/qcommon/cm_patch.c
index f262db9c..38b7d5cc 100644
--- a/src/qcommon/cm_patch.c
+++ b/src/qcommon/cm_patch.c
@@ -1154,7 +1154,7 @@ struct patchCollide_s	*CM_GeneratePatchCollide( int width, int height, vec3_t *p
 
 	if ( width <= 2 || height <= 2 || !points ) {
 		Com_Error( ERR_DROP, "CM_GeneratePatchFacets: bad parameters: (%i, %i, %p)",
-			width, height, points );
+			width, height, (void *)points );
 	}
 
 	if ( !(width & 1) || !(height & 1) ) {
@@ -1387,6 +1387,11 @@ void CM_TraceThroughPatchCollide( traceWork_t *tw, const struct patchCollide_s *
 	static cvar_t *cv;
 #endif //BSPC
 
+	if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1],
+				pc->bounds[0], pc->bounds[1] ) ) {
+		return;
+	}
+
 	if (tw->isPoint) {
 		CM_TracePointThroughPatchCollide( tw, pc );
 		return;
diff --git a/src/qcommon/cm_test.c b/src/qcommon/cm_test.c
index 9e950603..485facc2 100644
--- a/src/qcommon/cm_test.c
+++ b/src/qcommon/cm_test.c
@@ -251,6 +251,10 @@ int CM_PointContents( const vec3_t p, clipHandle_t model ) {
 		brushnum = cm.leafbrushes[leaf->firstLeafBrush+k];
 		b = &cm.brushes[brushnum];
 
+		if ( !BoundsIntersectPoint( b->bounds[0], b->bounds[1], p ) ) {
+			continue;
+		}
+
 		// see if the point is in the brush
 		for ( i = 0 ; i < b->numsides ; i++ ) {
 			d = DotProduct( p, b->sides[i].plane->normal );
diff --git a/src/qcommon/cm_trace.c b/src/qcommon/cm_trace.c
index ee9540e3..c40e1013 100644
--- a/src/qcommon/cm_trace.c
+++ b/src/qcommon/cm_trace.c
@@ -847,6 +847,11 @@ void CM_TraceThroughLeaf( traceWork_t *tw, cLeaf_t *leaf ) {
 
 		b->collided = qfalse;
 
+		if ( !BoundsIntersect( tw->bounds[0], tw->bounds[1],
+					b->bounds[0], b->bounds[1] ) ) {
+			continue;
+		}
+
 		CM_TraceThroughBrush( tw, b );
 		if ( !tw->trace.fraction ) {
 			tw->trace.lateralFraction = 0.0f;
diff --git a/src/qcommon/common.c b/src/qcommon/common.c
index cdfe3386..be4eafd1 100644
--- a/src/qcommon/common.c
+++ b/src/qcommon/common.c
@@ -1257,7 +1257,7 @@ void Com_Meminfo_f( void ) {
 	for (block = mainzone->blocklist.next ; ; block = block->next) {
 		if ( Cmd_Argc() != 1 ) {
 			Com_Printf ("block:%p    size:%7i    tag:%3i\n",
-				block, block->size, block->tag);
+				(void *)block, block->size, block->tag);
 		}
 		if ( block->tag ) {
 			zoneBytes += block->size;
diff --git a/src/qcommon/files.c b/src/qcommon/files.c
index cd369908..c1f6fb2d 100644
--- a/src/qcommon/files.c
+++ b/src/qcommon/files.c
@@ -975,22 +975,6 @@ qboolean FS_FilenameCompare( const char *s1, const char *s2 ) {
 
 /*
 ===========
-FS_ShiftedStrStr
-===========
-*/
-char *FS_ShiftedStrStr(const char *string, const char *substring, int shift) {
-	char buf[MAX_STRING_TOKENS];
-	int i;
-
-	for (i = 0; substring[i]; i++) {
-		buf[i] = substring[i] + shift;
-	}
-	buf[i] = '\0';
-	return strstr(string, buf);
-}
-
-/*
-===========
 FS_FOpenFileRead
 
 Finds the file in the search path.
@@ -1117,19 +1101,13 @@ int FS_FOpenFileRead( const char *filename, fileHandle_t *file, qboolean uniqueF
 						}
 					}
 
-					// game.qvm	- 13
-					// ZT`X!di`
-					if (!(pak->referenced & FS_QAGAME_REF) && FS_ShiftedStrStr(filename, "ZT`X!di`", 13)) {
+					if (!(pak->referenced & FS_QAGAME_REF) && strstr(filename, "game.qvm")) {
 						pak->referenced |= FS_QAGAME_REF;
 					}
-					// cgame.qvm	- 7
-					// \`Zf^'jof
-					if (!(pak->referenced & FS_CGAME_REF) && FS_ShiftedStrStr(filename , "\\`Zf^'jof", 7)) {
+					if (!(pak->referenced & FS_CGAME_REF) && strstr(filename, "cgame.qvm")) {
 						pak->referenced |= FS_CGAME_REF;
 					}
-					// ui.qvm		- 5
-					// pd)lqh
-					if (!(pak->referenced & FS_UI_REF) && FS_ShiftedStrStr(filename , "pd)lqh", 5)) {
+					if (!(pak->referenced & FS_UI_REF) && strstr(filename, "ui.qvm")) {
 						pak->referenced |= FS_UI_REF;
 					}
 
diff --git a/src/qcommon/msg.c b/src/qcommon/msg.c
index 46019809..d46c2a3b 100644
--- a/src/qcommon/msg.c
+++ b/src/qcommon/msg.c
@@ -791,7 +791,7 @@ typedef struct {
 } netField_t;
 
 // using the stringizing operator to save typing...
-#define	NETF(x) #x,(int)&((entityState_t*)0)->x
+#define	NETF(x) #x,(size_t)&((entityState_t*)0)->x
 
 netField_t	entityStateFields[] = 
 {
@@ -1106,7 +1106,7 @@ plyer_state_t communication
 */
 
 // using the stringizing operator to save typing...
-#define	PSF(x) #x,(int)&((playerState_t*)0)->x
+#define	PSF(x) #x,(size_t)&((playerState_t*)0)->x
 
 netField_t	playerStateFields[] = 
 {
diff --git a/src/qcommon/puff.c b/src/qcommon/puff.c
new file mode 100644
index 00000000..721854d8
--- /dev/null
+++ b/src/qcommon/puff.c
@@ -0,0 +1,758 @@
+/*
+ *  This is a modified version of Mark Adlers work,
+ *  see below for the original copyright.
+ *  2006 - Joerg Dietrich <dietrich_joerg@gmx.de>
+ */
+
+/*
+ * puff.c
+ * Copyright (C) 2002-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in puff.h
+ * version 1.8, 9 Jan 2004
+ *
+ * puff.c is a simple inflate written to be an unambiguous way to specify the
+ * deflate format.  It is not written for speed but rather simplicity.  As a
+ * side benefit, this code might actually be useful when small code is more
+ * important than speed, such as bootstrap applications.  For typical deflate
+ * data, zlib's inflate() is about four times as fast as puff().  zlib's
+ * inflate compiles to around 20K on my machine, whereas puff.c compiles to
+ * around 4K on my machine (a PowerPC using GNU cc).  If the faster decode()
+ * function here is used, then puff() is only twice as slow as zlib's
+ * inflate().
+ *
+ * All dynamically allocated memory comes from the stack.  The stack required
+ * is less than 2K bytes.  This code is compatible with 16-bit int's and
+ * assumes that long's are at least 32 bits.  puff.c uses the short data type,
+ * assumed to be 16 bits, for arrays in order to to conserve memory.  The code
+ * works whether integers are stored big endian or little endian.
+ *
+ * In the comments below are "Format notes" that describe the inflate process
+ * and document some of the less obvious aspects of the format.  This source
+ * code is meant to supplement RFC 1951, which formally describes the deflate
+ * format:
+ *
+ *    http://www.zlib.org/rfc-deflate.html
+ */
+
+/*
+ * Change history:
+ *
+ * 1.0  10 Feb 2002     - First version
+ * 1.1  17 Feb 2002     - Clarifications of some comments and notes
+ *                      - Update puff() dest and source pointers on negative
+ *                        errors to facilitate debugging deflators
+ *                      - Remove longest from struct huffman -- not needed
+ *                      - Simplify offs[] index in construct()
+ *                      - Add input size and checking, using longjmp() to
+ *                        maintain easy readability
+ *                      - Use short data type for large arrays
+ *                      - Use pointers instead of long to specify source and
+ *                        destination sizes to avoid arbitrary 4 GB limits
+ * 1.2  17 Mar 2002     - Add faster version of decode(), doubles speed (!),
+ *                        but leave simple version for readabilty
+ *                      - Make sure invalid distances detected if pointers
+ *                        are 16 bits
+ *                      - Fix fixed codes table error
+ *                      - Provide a scanning mode for determining size of
+ *                        uncompressed data
+ * 1.3  20 Mar 2002     - Go back to lengths for puff() parameters [Jean-loup]
+ *                      - Add a puff.h file for the interface
+ *                      - Add braces in puff() for else do [Jean-loup]
+ *                      - Use indexes instead of pointers for readability
+ * 1.4  31 Mar 2002     - Simplify construct() code set check
+ *                      - Fix some comments
+ *                      - Add FIXLCODES #define
+ * 1.5   6 Apr 2002     - Minor comment fixes
+ * 1.6   7 Aug 2002     - Minor format changes
+ * 1.7   3 Mar 2003     - Added test code for distribution
+ *                      - Added zlib-like license
+ * 1.8   9 Jan 2004     - Added some comments on no distance codes case
+ */
+
+#include <setjmp.h>             /* for setjmp(), longjmp(), and jmp_buf */
+#include "puff.h"		/* prototype for puff() */
+
+#define local static            /* for local function definitions */
+
+/*
+ * Maximums for allocations and loops.  It is not useful to change these --
+ * they are fixed by the deflate format.
+ */
+#define MAXBITS 15              /* maximum bits in a code */
+#define MAXLCODES 286           /* maximum number of literal/length codes */
+#define MAXDCODES 30            /* maximum number of distance codes */
+#define MAXCODES (MAXLCODES+MAXDCODES)  /* maximum codes lengths to read */
+#define FIXLCODES 288           /* number of fixed literal/length codes */
+
+/* input and output state */
+struct state {
+    /* output state */
+    uint8_t *out;         /* output buffer */
+    uint32_t outlen;       /* available space at out */
+    uint32_t outcnt;       /* bytes written to out so far */
+
+    /* input state */
+    uint8_t *in;          /* input buffer */
+    uint32_t inlen;        /* available input at in */
+    uint32_t incnt;        /* bytes read so far */
+    int32_t bitbuf;                 /* bit buffer */
+    int32_t bitcnt;                 /* number of bits in bit buffer */
+
+    /* input limit error return state for bits() and decode() */
+    jmp_buf env;
+};
+
+/*
+ * Return need bits from the input stream.  This always leaves less than
+ * eight bits in the buffer.  bits() works properly for need == 0.
+ *
+ * Format notes:
+ *
+ * - Bits are stored in bytes from the least significant bit to the most
+ *   significant bit.  Therefore bits are dropped from the bottom of the bit
+ *   buffer, using shift right, and new bytes are appended to the top of the
+ *   bit buffer, using shift left.
+ */
+local int32_t bits(struct state *s, int32_t need)
+{
+    int32_t val;           /* bit accumulator (can use up to 20 bits) */
+
+    /* load at least need bits into val */
+    val = s->bitbuf;
+    while (s->bitcnt < need) {
+        if (s->incnt == s->inlen) longjmp(s->env, 1);   /* out of input */
+        val |= (int32_t)(s->in[s->incnt++]) << s->bitcnt;  /* load eight bits */
+        s->bitcnt += 8;
+    }
+
+    /* drop need bits and update buffer, always zero to seven bits left */
+    s->bitbuf = (int32_t)(val >> need);
+    s->bitcnt -= need;
+
+    /* return need bits, zeroing the bits above that */
+    return (int32_t)(val & ((1L << need) - 1));
+}
+
+/*
+ * Process a stored block.
+ *
+ * Format notes:
+ *
+ * - After the two-bit stored block type (00), the stored block length and
+ *   stored bytes are byte-aligned for fast copying.  Therefore any leftover
+ *   bits in the byte that has the last bit of the type, as many as seven, are
+ *   discarded.  The value of the discarded bits are not defined and should not
+ *   be checked against any expectation.
+ *
+ * - The second inverted copy of the stored block length does not have to be
+ *   checked, but it's probably a good idea to do so anyway.
+ *
+ * - A stored block can have zero length.  This is sometimes used to byte-align
+ *   subsets of the compressed data for random access or partial recovery.
+ */
+local int32_t stored(struct state *s)
+{
+    uint32_t len;       /* length of stored block */
+
+    /* discard leftover bits from current byte (assumes s->bitcnt < 8) */
+    s->bitbuf = 0;
+    s->bitcnt = 0;
+
+    /* get length and check against its one's complement */
+    if (s->incnt + 4 > s->inlen) return 2;      /* not enough input */
+    len = s->in[s->incnt++];
+    len |= s->in[s->incnt++] << 8;
+    if (s->in[s->incnt++] != (~len & 0xff) ||
+        s->in[s->incnt++] != ((~len >> 8) & 0xff))
+        return -2;                              /* didn't match complement! */
+
+    /* copy len bytes from in to out */
+    if (s->incnt + len > s->inlen) return 2;    /* not enough input */
+    if (s->out != NULL) {
+        if (s->outcnt + len > s->outlen)
+            return 1;                           /* not enough output space */
+        while (len--)
+            s->out[s->outcnt++] = s->in[s->incnt++];
+    }
+    else {                                      /* just scanning */
+        s->outcnt += len;
+        s->incnt += len;
+    }
+
+    /* done with a valid stored block */
+    return 0;
+}
+
+/*
+ * Huffman code decoding tables.  count[1..MAXBITS] is the number of symbols of
+ * each length, which for a canonical code are stepped through in order.
+ * symbol[] are the symbol values in canonical order, where the number of
+ * entries is the sum of the counts in count[].  The decoding process can be
+ * seen in the function decode() below.
+ */
+struct huffman {
+    int16_t *count;       /* number of symbols of each length */
+    int16_t *symbol;      /* canonically ordered symbols */
+};
+
+/*
+ * Decode a code from the stream s using huffman table h.  Return the symbol or
+ * a negative value if there is an error.  If all of the lengths are zero, i.e.
+ * an empty code, or if the code is incomplete and an invalid code is received,
+ * then -9 is returned after reading MAXBITS bits.
+ *
+ * Format notes:
+ *
+ * - The codes as stored in the compressed data are bit-reversed relative to
+ *   a simple integer ordering of codes of the same lengths.  Hence below the
+ *   bits are pulled from the compressed data one at a time and used to
+ *   build the code value reversed from what is in the stream in order to
+ *   permit simple integer comparisons for decoding.  A table-based decoding
+ *   scheme (as used in zlib) does not need to do this reversal.
+ *
+ * - The first code for the shortest length is all zeros.  Subsequent codes of
+ *   the same length are simply integer increments of the previous code.  When
+ *   moving up a length, a zero bit is appended to the code.  For a complete
+ *   code, the last code of the longest length will be all ones.
+ *
+ * - Incomplete codes are handled by this decoder, since they are permitted
+ *   in the deflate format.  See the format notes for fixed() and dynamic().
+ */
+local int32_t decode(struct state *s, struct huffman *h)
+{
+    int32_t len;            /* current number of bits in code */
+    int32_t code;           /* len bits being decoded */
+    int32_t first;          /* first code of length len */
+    int32_t count;          /* number of codes of length len */
+    int32_t index;          /* index of first code of length len in symbol table */
+    int32_t bitbuf;         /* bits from stream */
+    int32_t left;           /* bits left in next or left to process */
+    int16_t *next;        /* next number of codes */
+
+    bitbuf = s->bitbuf;
+    left = s->bitcnt;
+    code = first = index = 0;
+    len = 1;
+    next = h->count + 1;
+    while (1) {
+        while (left--) {
+            code |= bitbuf & 1;
+            bitbuf >>= 1;
+            count = *next++;
+            if (code < first + count) { /* if length len, return symbol */
+                s->bitbuf = bitbuf;
+                s->bitcnt = (s->bitcnt - len) & 7;
+                return h->symbol[index + (code - first)];
+            }
+            index += count;             /* else update for next length */
+            first += count;
+            first <<= 1;
+            code <<= 1;
+            len++;
+        }
+        left = (MAXBITS+1) - len;
+        if (left == 0) break;
+        if (s->incnt == s->inlen) longjmp(s->env, 1);   /* out of input */
+        bitbuf = s->in[s->incnt++];
+        if (left > 8) left = 8;
+    }
+    return -9;                          /* ran out of codes */
+}
+
+/*
+ * Given the list of code lengths length[0..n-1] representing a canonical
+ * Huffman code for n symbols, construct the tables required to decode those
+ * codes.  Those tables are the number of codes of each length, and the symbols
+ * sorted by length, retaining their original order within each length.  The
+ * return value is zero for a complete code set, negative for an over-
+ * subscribed code set, and positive for an incomplete code set.  The tables
+ * can be used if the return value is zero or positive, but they cannot be used
+ * if the return value is negative.  If the return value is zero, it is not
+ * possible for decode() using that table to return an error--any stream of
+ * enough bits will resolve to a symbol.  If the return value is positive, then
+ * it is possible for decode() using that table to return an error for received
+ * codes past the end of the incomplete lengths.
+ *
+ * Not used by decode(), but used for error checking, h->count[0] is the number
+ * of the n symbols not in the code.  So n - h->count[0] is the number of
+ * codes.  This is useful for checking for incomplete codes that have more than
+ * one symbol, which is an error in a dynamic block.
+ *
+ * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS
+ * This is assured by the construction of the length arrays in dynamic() and
+ * fixed() and is not verified by construct().
+ *
+ * Format notes:
+ *
+ * - Permitted and expected examples of incomplete codes are one of the fixed
+ *   codes and any code with a single symbol which in deflate is coded as one
+ *   bit instead of zero bits.  See the format notes for fixed() and dynamic().
+ *
+ * - Within a given code length, the symbols are kept in ascending order for
+ *   the code bits definition.
+ */
+local int32_t construct(struct huffman *h, int16_t *length, int32_t n)
+{
+    int32_t symbol;         /* current symbol when stepping through length[] */
+    int32_t len;            /* current length when stepping through h->count[] */
+    int32_t left;           /* number of possible codes left of current length */
+    int16_t offs[MAXBITS+1];      /* offsets in symbol table for each length */
+
+    /* count number of codes of each length */
+    for (len = 0; len <= MAXBITS; len++)
+        h->count[len] = 0;
+    for (symbol = 0; symbol < n; symbol++)
+        (h->count[length[symbol]])++;   /* assumes lengths are within bounds */
+    if (h->count[0] == n)               /* no codes! */
+        return 0;                       /* complete, but decode() will fail */
+
+    /* check for an over-subscribed or incomplete set of lengths */
+    left = 1;                           /* one possible code of zero length */
+    for (len = 1; len <= MAXBITS; len++) {
+        left <<= 1;                     /* one more bit, double codes left */
+        left -= h->count[len];          /* deduct count from possible codes */
+        if (left < 0) return left;      /* over-subscribed--return negative */
+    }                                   /* left > 0 means incomplete */
+
+    /* generate offsets into symbol table for each length for sorting */
+    offs[1] = 0;
+    for (len = 1; len < MAXBITS; len++)
+        offs[len + 1] = offs[len] + h->count[len];
+
+    /*
+     * put symbols in table sorted by length, by symbol order within each
+     * length
+     */
+    for (symbol = 0; symbol < n; symbol++)
+        if (length[symbol] != 0)
+            h->symbol[offs[length[symbol]]++] = symbol;
+
+    /* return zero for complete set, positive for incomplete set */
+    return left;
+}
+
+/*
+ * Decode literal/length and distance codes until an end-of-block code.
+ *
+ * Format notes:
+ *
+ * - Compressed data that is after the block type if fixed or after the code
+ *   description if dynamic is a combination of literals and length/distance
+ *   pairs terminated by and end-of-block code.  Literals are simply Huffman
+ *   coded bytes.  A length/distance pair is a coded length followed by a
+ *   coded distance to represent a string that occurs earlier in the
+ *   uncompressed data that occurs again at the current location.
+ *
+ * - Literals, lengths, and the end-of-block code are combined into a single
+ *   code of up to 286 symbols.  They are 256 literals (0..255), 29 length
+ *   symbols (257..285), and the end-of-block symbol (256).
+ *
+ * - There are 256 possible lengths (3..258), and so 29 symbols are not enough
+ *   to represent all of those.  Lengths 3..10 and 258 are in fact represented
+ *   by just a length symbol.  Lengths 11..257 are represented as a symbol and
+ *   some number of extra bits that are added as an integer to the base length
+ *   of the length symbol.  The number of extra bits is determined by the base
+ *   length symbol.  These are in the static arrays below, lens[] for the base
+ *   lengths and lext[] for the corresponding number of extra bits.
+ *
+ * - The reason that 258 gets its own symbol is that the longest length is used
+ *   often in highly redundant files.  Note that 258 can also be coded as the
+ *   base value 227 plus the maximum extra value of 31.  While a good deflate
+ *   should never do this, it is not an error, and should be decoded properly.
+ *
+ * - If a length is decoded, including its extra bits if any, then it is
+ *   followed a distance code.  There are up to 30 distance symbols.  Again
+ *   there are many more possible distances (1..32768), so extra bits are added
+ *   to a base value represented by the symbol.  The distances 1..4 get their
+ *   own symbol, but the rest require extra bits.  The base distances and
+ *   corresponding number of extra bits are below in the static arrays dist[]
+ *   and dext[].
+ *
+ * - Literal bytes are simply written to the output.  A length/distance pair is
+ *   an instruction to copy previously uncompressed bytes to the output.  The
+ *   copy is from distance bytes back in the output stream, copying for length
+ *   bytes.
+ *
+ * - Distances pointing before the beginning of the output data are not
+ *   permitted.
+ *
+ * - Overlapped copies, where the length is greater than the distance, are
+ *   allowed and common.  For example, a distance of one and a length of 258
+ *   simply copies the last byte 258 times.  A distance of four and a length of
+ *   twelve copies the last four bytes three times.  A simple forward copy
+ *   ignoring whether the length is greater than the distance or not implements
+ *   this correctly.  You should not use memcpy() since its behavior is not
+ *   defined for overlapped arrays.  You should not use memmove() or bcopy()
+ *   since though their behavior -is- defined for overlapping arrays, it is
+ *   defined to do the wrong thing in this case.
+ */
+local int32_t codes(struct state *s,
+                struct huffman *lencode,
+                struct huffman *distcode)
+{
+    int32_t symbol;         /* decoded symbol */
+    int32_t len;            /* length for copy */
+    uint32_t dist;          /* distance for copy */
+    static const int16_t lens[29] = { /* Size base for length codes 257..285 */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258};
+    static const int16_t lext[29] = { /* Extra bits for length codes 257..285 */
+        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+        3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
+    static const int16_t dists[30] = { /* Offset base for distance codes 0..29 */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577};
+    static const int16_t dext[30] = { /* Extra bits for distance codes 0..29 */
+        0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+        7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+        12, 12, 13, 13};
+
+    /* decode literals and length/distance pairs */
+    do {
+        symbol = decode(s, lencode);
+        if (symbol < 0) return symbol;  /* invalid symbol */
+        if (symbol < 256) {             /* literal: symbol is the byte */
+            /* write out the literal */
+            if (s->out != NULL) {
+                if (s->outcnt == s->outlen) return 1;
+                s->out[s->outcnt] = symbol;
+            }
+            s->outcnt++;
+        }
+        else if (symbol > 256) {        /* length */
+            /* get and compute length */
+            symbol -= 257;
+            if (symbol >= 29) return -9;        /* invalid fixed code */
+            len = lens[symbol] + bits(s, lext[symbol]);
+
+            /* get and check distance */
+            symbol = decode(s, distcode);
+            if (symbol < 0) return symbol;      /* invalid symbol */
+            dist = dists[symbol] + bits(s, dext[symbol]);
+            if (dist > s->outcnt)
+                return -10;     /* distance too far back */
+
+            /* copy length bytes from distance bytes back */
+            if (s->out != NULL) {
+                if (s->outcnt + len > s->outlen) return 1;
+                while (len--) {
+                    s->out[s->outcnt] = s->out[s->outcnt - dist];
+                    s->outcnt++;
+                }
+            }
+            else
+                s->outcnt += len;
+        }
+    } while (symbol != 256);            /* end of block symbol */
+
+    /* done with a valid fixed or dynamic block */
+    return 0;
+}
+
+/*
+ * Process a fixed codes block.
+ *
+ * Format notes:
+ *
+ * - This block type can be useful for compressing small amounts of data for
+ *   which the size of the code descriptions in a dynamic block exceeds the
+ *   benefit of custom codes for that block.  For fixed codes, no bits are
+ *   spent on code descriptions.  Instead the code lengths for literal/length
+ *   codes and distance codes are fixed.  The specific lengths for each symbol
+ *   can be seen in the "for" loops below.
+ *
+ * - The literal/length code is complete, but has two symbols that are invalid
+ *   and should result in an error if received.  This cannot be implemented
+ *   simply as an incomplete code since those two symbols are in the "middle"
+ *   of the code.  They are eight bits long and the longest literal/length\
+ *   code is nine bits.  Therefore the code must be constructed with those
+ *   symbols, and the invalid symbols must be detected after decoding.
+ *
+ * - The fixed distance codes also have two invalid symbols that should result
+ *   in an error if received.  Since all of the distance codes are the same
+ *   length, this can be implemented as an incomplete code.  Then the invalid
+ *   codes are detected while decoding.
+ */
+local int32_t fixed(struct state *s)
+{
+    static int32_t virgin = 1;
+    static int16_t lencnt[MAXBITS+1], lensym[FIXLCODES];
+    static int16_t distcnt[MAXBITS+1], distsym[MAXDCODES];
+    static struct huffman lencode = {lencnt, lensym};
+    static struct huffman distcode = {distcnt, distsym};
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        int32_t symbol;
+        int16_t lengths[FIXLCODES];
+
+        /* literal/length table */
+        for (symbol = 0; symbol < 144; symbol++)
+            lengths[symbol] = 8;
+        for (; symbol < 256; symbol++)
+            lengths[symbol] = 9;
+        for (; symbol < 280; symbol++)
+            lengths[symbol] = 7;
+        for (; symbol < FIXLCODES; symbol++)
+            lengths[symbol] = 8;
+        construct(&lencode, lengths, FIXLCODES);
+
+        /* distance table */
+        for (symbol = 0; symbol < MAXDCODES; symbol++)
+            lengths[symbol] = 5;
+        construct(&distcode, lengths, MAXDCODES);
+
+        /* do this just once */
+        virgin = 0;
+    }
+
+    /* decode data until end-of-block code */
+    return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Process a dynamic codes block.
+ *
+ * Format notes:
+ *
+ * - A dynamic block starts with a description of the literal/length and
+ *   distance codes for that block.  New dynamic blocks allow the compressor to
+ *   rapidly adapt to changing data with new codes optimized for that data.
+ *
+ * - The codes used by the deflate format are "canonical", which means that
+ *   the actual bits of the codes are generated in an unambiguous way simply
+ *   from the number of bits in each code.  Therefore the code descriptions
+ *   are simply a list of code lengths for each symbol.
+ *
+ * - The code lengths are stored in order for the symbols, so lengths are
+ *   provided for each of the literal/length symbols, and for each of the
+ *   distance symbols.
+ *
+ * - If a symbol is not used in the block, this is represented by a zero as
+ *   as the code length.  This does not mean a zero-length code, but rather
+ *   that no code should be created for this symbol.  There is no way in the
+ *   deflate format to represent a zero-length code.
+ *
+ * - The maximum number of bits in a code is 15, so the possible lengths for
+ *   any code are 1..15.
+ *
+ * - The fact that a length of zero is not permitted for a code has an
+ *   interesting consequence.  Normally if only one symbol is used for a given
+ *   code, then in fact that code could be represented with zero bits.  However
+ *   in deflate, that code has to be at least one bit.  So for example, if
+ *   only a single distance base symbol appears in a block, then it will be
+ *   represented by a single code of length one, in particular one 0 bit.  This
+ *   is an incomplete code, since if a 1 bit is received, it has no meaning,
+ *   and should result in an error.  So incomplete distance codes of one symbol
+ *   should be permitted, and the receipt of invalid codes should be handled.
+ *
+ * - It is also possible to have a single literal/length code, but that code
+ *   must be the end-of-block code, since every dynamic block has one.  This
+ *   is not the most efficient way to create an empty block (an empty fixed
+ *   block is fewer bits), but it is allowed by the format.  So incomplete
+ *   literal/length codes of one symbol should also be permitted.
+ *
+ * - If there are only literal codes and no lengths, then there are no distance
+ *   codes.  This is represented by one distance code with zero bits.
+ *
+ * - The list of up to 286 length/literal lengths and up to 30 distance lengths
+ *   are themselves compressed using Huffman codes and run-length encoding.  In
+ *   the list of code lengths, a 0 symbol means no code, a 1..15 symbol means
+ *   that length, and the symbols 16, 17, and 18 are run-length instructions.
+ *   Each of 16, 17, and 18 are follwed by extra bits to define the length of
+ *   the run.  16 copies the last length 3 to 6 times.  17 represents 3 to 10
+ *   zero lengths, and 18 represents 11 to 138 zero lengths.  Unused symbols
+ *   are common, hence the special coding for zero lengths.
+ *
+ * - The symbols for 0..18 are Huffman coded, and so that code must be
+ *   described first.  This is simply a sequence of up to 19 three-bit values
+ *   representing no code (0) or the code length for that symbol (1..7).
+ *
+ * - A dynamic block starts with three fixed-size counts from which is computed
+ *   the number of literal/length code lengths, the number of distance code
+ *   lengths, and the number of code length code lengths (ok, you come up with
+ *   a better name!) in the code descriptions.  For the literal/length and
+ *   distance codes, lengths after those provided are considered zero, i.e. no
+ *   code.  The code length code lengths are received in a permuted order (see
+ *   the order[] array below) to make a short code length code length list more
+ *   likely.  As it turns out, very short and very long codes are less likely
+ *   to be seen in a dynamic code description, hence what may appear initially
+ *   to be a peculiar ordering.
+ *
+ * - Given the number of literal/length code lengths (nlen) and distance code
+ *   lengths (ndist), then they are treated as one long list of nlen + ndist
+ *   code lengths.  Therefore run-length coding can and often does cross the
+ *   boundary between the two sets of lengths.
+ *
+ * - So to summarize, the code description at the start of a dynamic block is
+ *   three counts for the number of code lengths for the literal/length codes,
+ *   the distance codes, and the code length codes.  This is followed by the
+ *   code length code lengths, three bits each.  This is used to construct the
+ *   code length code which is used to read the remainder of the lengths.  Then
+ *   the literal/length code lengths and distance lengths are read as a single
+ *   set of lengths using the code length codes.  Codes are constructed from
+ *   the resulting two sets of lengths, and then finally you can start
+ *   decoding actual compressed data in the block.
+ *
+ * - For reference, a "typical" size for the code description in a dynamic
+ *   block is around 80 bytes.
+ */
+local int32_t dynamic(struct state *s)
+{
+    int32_t nlen, ndist, ncode;             /* number of lengths in descriptor */
+    int32_t index;                          /* index of lengths[] */
+    int32_t err;                            /* construct() return value */
+    int16_t lengths[MAXCODES];            /* descriptor code lengths */
+    int16_t lencnt[MAXBITS+1], lensym[MAXLCODES];         /* lencode memory */
+    int16_t distcnt[MAXBITS+1], distsym[MAXDCODES];       /* distcode memory */
+    struct huffman lencode = {lencnt, lensym};          /* length code */
+    struct huffman distcode = {distcnt, distsym};       /* distance code */
+    static const int16_t order[19] =      /* permutation of code length codes */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    /* get number of lengths in each table, check lengths */
+    nlen = bits(s, 5) + 257;
+    ndist = bits(s, 5) + 1;
+    ncode = bits(s, 4) + 4;
+    if (nlen > MAXLCODES || ndist > MAXDCODES)
+        return -3;                      /* bad counts */
+
+    /* read code length code lengths (really), missing lengths are zero */
+    for (index = 0; index < ncode; index++)
+        lengths[order[index]] = bits(s, 3);
+    for (; index < 19; index++)
+        lengths[order[index]] = 0;
+
+    /* build huffman table for code lengths codes (use lencode temporarily) */
+    err = construct(&lencode, lengths, 19);
+    if (err != 0) return -4;            /* require complete code set here */
+
+    /* read length/literal and distance code length tables */
+    index = 0;
+    while (index < nlen + ndist) {
+        int32_t symbol;             /* decoded value */
+        int32_t len;                /* last length to repeat */
+
+        symbol = decode(s, &lencode);
+        if (symbol < 16)                /* length in 0..15 */
+            lengths[index++] = symbol;
+        else {                          /* repeat instruction */
+            len = 0;                    /* assume repeating zeros */
+            if (symbol == 16) {         /* repeat last length 3..6 times */
+                if (index == 0) return -5;      /* no last length! */
+                len = lengths[index - 1];       /* last length */
+                symbol = 3 + bits(s, 2);
+            }
+            else if (symbol == 17)      /* repeat zero 3..10 times */
+                symbol = 3 + bits(s, 3);
+            else                        /* == 18, repeat zero 11..138 times */
+                symbol = 11 + bits(s, 7);
+            if (index + symbol > nlen + ndist)
+                return -6;              /* too many lengths! */
+            while (symbol--)            /* repeat last or zero symbol times */
+                lengths[index++] = len;
+        }
+    }
+
+    /* build huffman table for literal/length codes */
+    err = construct(&lencode, lengths, nlen);
+    if (err < 0 || (err > 0 && nlen - lencode.count[0] != 1))
+        return -7;      /* only allow incomplete codes if just one code */
+
+    /* build huffman table for distance codes */
+    err = construct(&distcode, lengths + nlen, ndist);
+    if (err < 0 || (err > 0 && ndist - distcode.count[0] != 1))
+        return -8;      /* only allow incomplete codes if just one code */
+
+    /* decode data until end-of-block code */
+    return codes(s, &lencode, &distcode);
+}
+
+/*
+ * Inflate source to dest.  On return, destlen and sourcelen are updated to the
+ * size of the uncompressed data and the size of the deflate data respectively.
+ * On success, the return value of puff() is zero.  If there is an error in the
+ * source data, i.e. it is not in the deflate format, then a negative value is
+ * returned.  If there is not enough input available or there is not enough
+ * output space, then a positive error is returned.  In that case, destlen and
+ * sourcelen are not updated to facilitate retrying from the beginning with the
+ * provision of more input data or more output space.  In the case of invalid
+ * inflate data (a negative error), the dest and source pointers are updated to
+ * facilitate the debugging of deflators.
+ *
+ * puff() also has a mode to determine the size of the uncompressed output with
+ * no output written.  For this dest must be (uint8_t *)0.  In this case,
+ * the input value of *destlen is ignored, and on return *destlen is set to the
+ * size of the uncompressed output.
+ *
+ * The return codes are:
+ *
+ *   2:  available inflate data did not terminate
+ *   1:  output space exhausted before completing inflate
+ *   0:  successful inflate
+ *  -1:  invalid block type (type == 3)
+ *  -2:  stored block length did not match one's complement
+ *  -3:  dynamic block code description: too many length or distance codes
+ *  -4:  dynamic block code description: code lengths codes incomplete
+ *  -5:  dynamic block code description: repeat lengths with no first length
+ *  -6:  dynamic block code description: repeat more than specified lengths
+ *  -7:  dynamic block code description: invalid literal/length code lengths
+ *  -8:  dynamic block code description: invalid distance code lengths
+ *  -9:  invalid literal/length or distance code in fixed or dynamic block
+ * -10:  distance is too far back in fixed or dynamic block
+ *
+ * Format notes:
+ *
+ * - Three bits are read for each block to determine the kind of block and
+ *   whether or not it is the last block.  Then the block is decoded and the
+ *   process repeated if it was not the last block.
+ *
+ * - The leftover bits in the last byte of the deflate data after the last
+ *   block (if it was a fixed or dynamic block) are undefined and have no
+ *   expected values to check.
+ */
+int32_t puff(uint8_t  *dest,           /* pointer to destination pointer */
+             uint32_t *destlen,        /* amount of output space */
+             uint8_t  *source,         /* pointer to source data pointer */
+             uint32_t *sourcelen)      /* amount of input available */
+{
+    struct state s;             /* input/output state */
+    int32_t last, type;             /* block information */
+    int32_t err;                    /* return value */
+
+    /* initialize output state */
+    s.out = dest;
+    s.outlen = *destlen;                /* ignored if dest is NULL */
+    s.outcnt = 0;
+
+    /* initialize input state */
+    s.in = source;
+    s.inlen = *sourcelen;
+    s.incnt = 0;
+    s.bitbuf = 0;
+    s.bitcnt = 0;
+
+    /* return if bits() or decode() tries to read past available input */
+    if (setjmp(s.env) != 0)             /* if came back here via longjmp() */
+        err = 2;                        /* then skip do-loop, return error */
+    else {
+        /* process blocks until last block or error */
+        do {
+            last = bits(&s, 1);         /* one if last block */
+            type = bits(&s, 2);         /* block type 0..3 */
+            err = type == 0 ? stored(&s) :
+                  (type == 1 ? fixed(&s) :
+                   (type == 2 ? dynamic(&s) :
+                    -1));               /* type == 3, invalid */
+            if (err != 0) break;        /* return with error */
+        } while (!last);
+    }
+
+    /* update the lengths and return */
+    if (err <= 0) {
+        *destlen = s.outcnt;
+        *sourcelen = s.incnt;
+    }
+    return err;
+}
diff --git a/src/qcommon/puff.h b/src/qcommon/puff.h
new file mode 100644
index 00000000..14070f64
--- /dev/null
+++ b/src/qcommon/puff.h
@@ -0,0 +1,43 @@
+/*
+ *  This is a modified version of Mark Adlers work,
+ *  see below for the original copyright.
+ *  2006 - Joerg Dietrich <dietrich_joerg@gmx.de>
+ */
+
+/* puff.h
+  Copyright (C) 2002, 2003 Mark Adler, all rights reserved
+  version 1.7, 3 Mar 2002
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the author be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Mark Adler    madler@alumni.caltech.edu
+ */
+
+#ifndef __PUFF_H
+#define __PUFF_H
+
+#include "q_shared.h"			/* for definitions of the <stdint.h> types */
+
+/*
+ * See puff.c for purpose and usage.
+ */
+int32_t puff(uint8_t  *dest,		/* pointer to destination pointer */
+             uint32_t *destlen,		/* amount of output space */
+             uint8_t  *source,		/* pointer to source data pointer */
+             uint32_t *sourcelen);	/* amount of input available */
+
+#endif // __PUFF_H
diff --git a/src/qcommon/q_math.c b/src/qcommon/q_math.c
index 196d2f55..0973f31c 100644
--- a/src/qcommon/q_math.c
+++ b/src/qcommon/q_math.c
@@ -1086,6 +1086,53 @@ void AddPointToBounds( const vec3_t v, vec3_t mins, vec3_t maxs ) {
 	}
 }
 
+qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs,
+		const vec3_t mins2, const vec3_t maxs2)
+{
+	if ( maxs[0] < mins2[0] ||
+		maxs[1] < mins2[1] ||
+		maxs[2] < mins2[2] ||
+		mins[0] > maxs2[0] ||
+		mins[1] > maxs2[1] ||
+		mins[2] > maxs2[2])
+	{
+		return qfalse;
+	}
+
+	return qtrue;
+}
+
+qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs,
+		const vec3_t origin, vec_t radius)
+{
+	if ( origin[0] - radius > maxs[0] ||
+		origin[0] + radius < mins[0] ||
+		origin[1] - radius > maxs[1] ||
+		origin[1] + radius < mins[1] ||
+		origin[2] - radius > maxs[2] ||
+		origin[2] + radius < mins[2])
+	{
+		return qfalse;
+	}
+
+	return qtrue;
+}
+
+qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs,
+		const vec3_t origin)
+{
+	if ( origin[0] > maxs[0] ||
+		origin[0] < mins[0] ||
+		origin[1] > maxs[1] ||
+		origin[1] < mins[1] ||
+		origin[2] > maxs[2] ||
+		origin[2] < mins[2])
+	{
+		return qfalse;
+	}
+
+	return qtrue;
+}
 
 vec_t VectorNormalize( vec3_t v ) {
 	// NOTE: TTimo - Apple G4 altivec source uses double?
diff --git a/src/qcommon/q_shared.h b/src/qcommon/q_shared.h
index 50a0b10c..2e3a153c 100644
--- a/src/qcommon/q_shared.h
+++ b/src/qcommon/q_shared.h
@@ -559,7 +559,7 @@ vec_t VectorLengthSquared( const vec3_t v );
 vec_t Distance( const vec3_t p1, const vec3_t p2 );
 
 vec_t DistanceSquared( const vec3_t p1, const vec3_t p2 );
- 
+
 void VectorNormalizeFast( vec3_t v );
 
 void VectorInverse( vec3_t v );
@@ -593,6 +593,13 @@ void AxisCopy( vec3_t in[3], vec3_t out[3] );
 void SetPlaneSignbits( struct cplane_s *out );
 int BoxOnPlaneSide (vec3_t emins, vec3_t emaxs, struct cplane_s *plane);
 
+qboolean BoundsIntersect(const vec3_t mins, const vec3_t maxs,
+		const vec3_t mins2, const vec3_t maxs2);
+qboolean BoundsIntersectSphere(const vec3_t mins, const vec3_t maxs,
+		const vec3_t origin, vec_t radius);
+qboolean BoundsIntersectPoint(const vec3_t mins, const vec3_t maxs,
+		const vec3_t origin);
+
 float	AngleMod(float a);
 float	LerpAngle (float from, float to, float frac);
 float	AngleSubtract( float a1, float a2 );
diff --git a/src/qcommon/vm_x86.c b/src/qcommon/vm_x86.c
index c0a703bc..d298b755 100644
--- a/src/qcommon/vm_x86.c
+++ b/src/qcommon/vm_x86.c
@@ -213,7 +213,7 @@ void callAsmCall(void)
 // arbitrarily named (though this is not true for the MSC version).  When a vm
 // makes a system call, control jumps straight to the doAsmCall label.
 void AsmCall( void ) {
-	asm( CMANG(doAsmCall) ":				\n\t" \
+	__asm__( CMANG(doAsmCall) ":				\n\t" \
 		"	movl (%%edi),%%eax			\n\t" \
 		"	subl $4,%%edi				\n\t" \
 		"	orl %%eax,%%eax				\n\t" \
diff --git a/src/qcommon/vm_x86_64.c b/src/qcommon/vm_x86_64.c
index e8e827e5..814acfef 100644
--- a/src/qcommon/vm_x86_64.c
+++ b/src/qcommon/vm_x86_64.c
@@ -29,9 +29,15 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <unistd.h>
+#include <stdarg.h>
+
+//#define USE_GAS
+//#define DEBUG_VM
 
 #ifdef DEBUG_VM
 #define Dfprintf(fd, args...) fprintf(fd, ##args)
@@ -40,6 +46,19 @@ static FILE* qdasmout;
 #define Dfprintf(args...)
 #endif
 
+#define VM_X86_64_MMAP
+
+#ifndef USE_GAS
+void assembler_set_output(char* buf);
+size_t assembler_get_code_size(void);
+void assembler_init(int pass);
+void assemble_line(const char* input, size_t len);
+#ifdef Dfprintf
+#undef Dfprintf
+#define Dfprintf(args...)
+#endif
+#endif // USE_GAS
+
 static void VM_Destroy_Compiled(vm_t* self);
 
 /*
@@ -207,8 +226,29 @@ static unsigned char op_argsize[256] =
 	[OP_BLOCK_COPY] = 4,
 };
 
+#ifdef USE_GAS
 #define emit(x...) \
 	do { fprintf(fh_s, ##x); fputc('\n', fh_s); } while(0)
+#else
+void emit(const char* fmt, ...)
+{
+	va_list ap;
+	char line[4096];
+	va_start(ap, fmt);
+	vsnprintf(line, sizeof(line), fmt, ap);
+	va_end(ap);
+	assemble_line(line, strlen(line));
+}
+#endif // USE_GAS
+
+#ifdef USE_GAS
+#define JMPIARG \
+	emit("jmp i_%08x", iarg);
+#else
+#define JMPIARG \
+	emit("movq $%lu, %%rax", vm->codeBase+vm->instructionPointers[iarg]); \
+	emit("jmpq *%rax");
+#endif
  
 // integer compare and jump
 #define IJ(op) \
@@ -216,7 +256,8 @@ static unsigned char op_argsize[256] =
 	emit("movl 4(%%rsi), %%eax"); \
 	emit("cmpl 8(%%rsi), %%eax"); \
 	emit(op " i_%08x", instruction+1); \
-	emit("jmp i_%08x", iarg);
+	JMPIARG \
+	neednilabel = 1;
 
 #ifdef USE_X87
 #define FJ(bits, op) \
@@ -226,7 +267,8 @@ static unsigned char op_argsize[256] =
 	emit("fnstsw %%ax");\
 	emit("testb $" #bits ", %%ah");\
 	emit(op " i_%08x", instruction+1);\
-	emit("jmp i_%08x", iarg);
+	JMPIARG \
+	neednilabel = 1;
 #define XJ(x)
 #else
 #define FJ(x, y)
@@ -236,7 +278,8 @@ static unsigned char op_argsize[256] =
 	emit("ucomiss 8(%%rsi), %%xmm0");\
 	emit("jp i_%08x", instruction+1);\
 	emit(op " i_%08x", instruction+1);\
-	emit("jmp i_%08x", iarg);
+	JMPIARG \
+	neednilabel = 1;
 #endif
 
 #define SIMPLE(op) \
@@ -293,9 +336,14 @@ static unsigned char op_argsize[256] =
 
 static void* getentrypoint(vm_t* vm)
 {
+#ifdef USE_GAS
        return vm->codeBase+64; // skip ELF header
+#else
+       return vm->codeBase;
+#endif // USE_GAS
 }
 
+#ifdef USE_GAS
 char* mmapfile(const char* fn, size_t* size)
 {
 	int fd = -1;
@@ -383,6 +431,7 @@ static int doas(char* in, char* out, unsigned char** compiledcode)
 
 	return size;
 }
+#endif // USE_GAS
 
 static void block_copy_vm(unsigned dest, unsigned src, unsigned count)
 {
@@ -411,8 +460,13 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 	char* code;
 	unsigned iarg = 0;
 	unsigned char barg = 0;
-	void* entryPoint;
+	int neednilabel = 0;
+	struct timeval tvstart =  {0, 0};
 
+#ifdef USE_GAS
+	byte* compiledcode;
+	int   compiledsize;
+	void* entryPoint;
 	char fn_s[2*MAX_QPATH]; // output file for assembler code
 	char fn_o[2*MAX_QPATH]; // file written by as
 #ifdef DEBUG_VM
@@ -420,16 +474,16 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 #endif
 	FILE* fh_s;
 	int fd_s, fd_o;
-	byte* compiledcode;
-	int   compiledsize;
+
+	gettimeofday(&tvstart, NULL);
 
 	Com_Printf("compiling %s\n", vm->name);
 
 #ifdef DEBUG_VM
 	snprintf(fn_s, sizeof(fn_s), "%.63s.s", vm->name);
 	snprintf(fn_o, sizeof(fn_o), "%.63s.o", vm->name);
-	fd_s = open(fn_s, O_CREAT|O_WRONLY, 0644);
-	fd_o = open(fn_o, O_CREAT|O_WRONLY, 0644);
+	fd_s = open(fn_s, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+	fd_o = open(fn_o, O_CREAT|O_WRONLY|O_TRUNC, 0644);
 #else
 	snprintf(fn_s, sizeof(fn_s), "/tmp/%.63s.s_XXXXXX", vm->name);
 	snprintf(fn_o, sizeof(fn_o), "/tmp/%.63s.o_XXXXXX", vm->name);
@@ -463,25 +517,50 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 		return;
 	}
 
-	// translate all instructions
-	pc = 0;
-	code = (char *)header + header->codeOffset;
-
 	emit("start:");
 	emit("or %%r8, %%r8"); // check whether to set up instruction pointers
 	emit("jnz main");
 	emit("jmp setupinstructionpointers");
 
 	emit("main:");
+#else  // USE_GAS
+	int pass;
+	size_t compiledOfs = 0;
+
+	gettimeofday(&tvstart, NULL);
+
+	for (pass = 0; pass < 2; ++pass) {
+
+	if(pass)
+	{
+		compiledOfs = assembler_get_code_size();
+		vm->codeLength = compiledOfs;
+		vm->codeBase = mmap(NULL, compiledOfs, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+		if(vm->codeBase == (void*)-1)
+			Com_Error(ERR_DROP, "VM_CompileX86: can't mmap memory");
+
+		assembler_set_output((char*)vm->codeBase);
+	}
+
+	assembler_init(pass);
+
+#endif // USE_GAS
+
+	// translate all instructions
+	pc = 0;
+	code = (char *)header + header->codeOffset;
 
 	for ( instruction = 0; instruction < header->instructionCount; ++instruction )
 	{
 		op = code[ pc ];
 		++pc;
 
-		vm->instructionPointers[instruction] = pc;
+#ifndef USE_GAS
+		vm->instructionPointers[instruction] = assembler_get_code_size();
+#endif
 
-#if 0
+		/* store current instruction number in r15 for debugging */
+#if 1
 		emit("nop");
 		emit("movq $%d, %%r15", instruction);
 		emit("nop");
@@ -502,7 +581,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 		{
 			Dfprintf(qdasmout, "%s\n", opnames[op]);
 		}
+
+#ifdef USE_GAS
 		emit("i_%08x:", instruction);
+#else
+		if(neednilabel)
+		{
+			emit("i_%08x:", instruction);
+			neednilabel = 0;
+		}
+#endif
+
 		switch ( op )
 		{
 			case OP_UNDEF:
@@ -561,6 +650,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 //				emit("frstor 4(%%rsi)");
 				emit("addq $4, %%rsi");
 				emit("movl %%eax, (%%rsi)"); // store return value
+				neednilabel = 1;
 				break;
 			case OP_PUSH:
 				emit("addq $4, %%rsi");
@@ -629,7 +719,8 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 				emit("jp dojump_i_%08x", instruction);
 				emit("jz i_%08x", instruction+1);
 				emit("dojump_i_%08x:", instruction);
-				emit("jmp i_%08x", iarg);
+				JMPIARG
+				neednilabel = 1;
 #endif
 				break;
 			case OP_LTF:
@@ -856,7 +947,7 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 		}
 	}
 
-
+#ifdef USE_GAS
 	emit("setupinstructionpointers:");
 	emit("movq $%lu, %%rax", (unsigned long)vm->instructionPointers);
 	for ( instruction = 0; instruction < header->instructionCount; ++instruction )
@@ -889,8 +980,17 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 	vm->codeBase   = compiledcode; // remember to skip ELF header!
 	vm->codeLength = compiledsize;
 
+#else  // USE_GAS
+	}
+	assembler_init(0);
+
+	if(mprotect(vm->codeBase, compiledOfs, PROT_READ|PROT_EXEC))
+		Com_Error(ERR_DROP, "VM_CompileX86: mprotect failed");
+#endif // USE_GAS
+
 	vm->destroy = VM_Destroy_Compiled;
 	
+#ifdef USE_GAS
 	entryPoint = getentrypoint(vm);
 
 //	__asm__ __volatile__ ("int3");
@@ -911,8 +1011,6 @@ void VM_Compile( vm_t *vm, vmHeader_t *header ) {
 	fclose(qdasmout);
 #endif
 
-	Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
-
 out:
 	close(fd_o);
 
@@ -923,12 +1021,30 @@ out:
 		unlink(fn_s);
 	}
 #endif
+#endif // USE_GAS
+
+	if(vm->compiled)
+	{
+		struct timeval tvdone =  {0, 0};
+		struct timeval dur =  {0, 0};
+		Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
+
+		gettimeofday(&tvdone, NULL);
+		timersub(&tvdone, &tvstart, &dur);
+		Com_Printf( "compilation took %lu.%06lu seconds\n", dur.tv_sec, dur.tv_usec );
+	}
 }
 
 
 void VM_Destroy_Compiled(vm_t* self)
 {
+#ifdef USE_GAS
 	munmap(self->codeBase, self->codeLength);
+#elif _WIN32
+	VirtualFree(self->codeBase, self->codeLength, MEM_RELEASE);
+#else
+	munmap(self->codeBase, self->codeLength);
+#endif
 }
 
 /*
diff --git a/src/qcommon/vm_x86_64_assembler.c b/src/qcommon/vm_x86_64_assembler.c
new file mode 100644
index 00000000..1eda764f
--- /dev/null
+++ b/src/qcommon/vm_x86_64_assembler.c
@@ -0,0 +1,1419 @@
+/*
+===========================================================================
+vm_x86_64_assembler.c -- assembler for x86-64
+
+Copyright (C) 2007 Ludwig Nussel <ludwig.nussel@suse.de>, Novell inc.
+
+Quake III Arena source code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Quake III Arena source code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Quake III Arena source code; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+===========================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long u64;
+
+static char* out;
+static unsigned compiledOfs;
+static unsigned assembler_pass;
+
+static const char* cur_line;
+
+static FILE* fout;
+
+#define MIN(a,b)  ((a) < (b) ? (a) : (b))
+#define MAX(a,b)  ((a) > (b) ? (a) : (b))
+
+#define crap(fmt, args...) do { \
+	_crap(__FUNCTION__, fmt, ##args); \
+} while(0)
+
+#define CRAP_INVALID_ARGS crap("invalid arguments %s, %s", argtype2str(arg1.type),argtype2str(arg2.type));
+
+#ifdef DEBUG
+#define debug(fmt, args...) printf(fmt, ##args)
+#else
+#define debug(fmt, args...)
+#endif
+
+static void _crap(const char* func, const char* fmt, ...)
+{
+	va_list ap;
+	fprintf(stderr, "%s() - ", func);
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	fputc('\n', stderr);
+	if(cur_line && cur_line[0])
+		fprintf(stderr, "-> %s\n", cur_line);
+	exit(1);
+}
+
+static void emit1(unsigned char v)
+{
+	if(assembler_pass)
+	{
+		out[compiledOfs++] = v;
+		if(fout) fwrite(&v, 1, 1, fout);
+		debug("%02hhx ", v);
+	}
+	else
+	{
+		++compiledOfs;
+	}
+}
+
+static inline void emit2(u16 v)
+{
+	emit1(v&0xFF);
+	emit1((v>>8)&0xFF);
+}
+
+static inline void emit4(u32 v)
+{
+	emit1(v&0xFF);
+	emit1((v>>8)&0xFF);
+	emit1((v>>16)&0xFF);
+	emit1((v>>24)&0xFF);
+}
+
+static inline void emit8(u64 v)
+{
+	emit4(v&0xFFFFFFFF);
+	emit4((v>>32)&0xFFFFFFFF);
+}
+
+enum {
+	REX_W = 0x08,
+	REX_R = 0x04,
+	REX_X = 0x02,
+	REX_B = 0x01,
+};
+
+enum {
+	MODRM_MOD_00 = 0x00,
+	MODRM_MOD_01 = 0x01 << 6,
+	MODRM_MOD_10 = 0x02 << 6,
+	MODRM_MOD_11 = 0x03 << 6,
+	MODRM_RM_SIB = 0x04,
+};
+
+typedef enum
+{
+	T_NONE      = 0x00,
+	T_REGISTER  = 0x01,
+	T_IMMEDIATE = 0x02,
+	T_MEMORY    = 0x04,
+	T_LABEL     = 0x08,
+	T_ABSOLUTE  = 0x80
+} argtype_t;
+
+typedef enum {
+	R_8   = 0x100, 
+	R_16  = 0x200, 
+	R_64  = 0x800, 
+	R_MSZ = 0xF00,  // size mask
+	R_XMM = 0x2000, // xmm register. year, sucks
+	R_EAX =  0x00,
+	R_EBX =  0x03,
+	R_ECX =  0x01,
+	R_EDX =  0x02,
+	R_ESI =  0x06,
+	R_EDI =  0x07,
+	R_ESP =  0x04,
+	R_RAX =  R_EAX | R_64,
+	R_RBX =  R_EBX | R_64,
+	R_RCX =  R_ECX | R_64,
+	R_RDX =  R_EDX | R_64,
+	R_RSI =  R_ESI | R_64,
+	R_RDI =  R_EDI | R_64,
+	R_RSP =  R_ESP | R_64,
+	R_R8  =  0x08  | R_64,
+	R_R9  =  0x09  | R_64,
+	R_R10 =  0x0A  | R_64,
+	R_R15 =  0x0F  | R_64,
+	R_AL  =  R_EAX | R_8,
+	R_AX  =  R_EAX | R_16,
+	R_CL  =  R_ECX | R_8,
+	R_XMM0 = 0x00  | R_XMM,
+	R_MGP =  0x0F, // mask for general purpose registers
+} reg_t;
+
+typedef enum {
+	MODRM_SIB = 0,
+	MODRM_NOSIB = 0x3,
+} modrm_sib_t;
+
+typedef struct {
+	unsigned disp;
+	argtype_t basetype;
+	union {
+		u64 imm;
+		reg_t reg;
+	} base;
+	argtype_t indextype;
+	union {
+		u64 imm;
+		reg_t reg;
+	} index;
+	unsigned scale;
+} memref_t;
+
+#define LABELLEN 32
+
+typedef struct {
+	argtype_t type;
+	union {
+		u64 imm;
+		reg_t reg;
+		memref_t mem;
+		char label[LABELLEN];
+	} v;
+	int absolute:1;
+} arg_t;
+
+typedef void (*emitfunc)(const char* op, arg_t arg1, arg_t arg2, void* data);
+
+typedef struct {
+	char* mnemonic;
+	emitfunc func;
+	void* data;
+} op_t;
+
+typedef struct {
+	u8 xmmprefix;
+	u8 subcode; // in modrm
+	u8 rmcode;  // opcode for reg/mem, reg
+	u8 mrcode;  // opcode for reg, reg/mem
+	u8 rcode8;  // opcode for reg8/mem8
+	u8 rcode;  // opcode for reg/mem
+} opparam_t;
+
+/* ************************* */
+
+static unsigned hashkey(const char *string, unsigned len) {
+	unsigned register hash, i;
+
+	hash = 0;
+	for (i = 0; i < len && string[i] != '\0'; ++i) {
+		hash += string[i] * (119 + i);
+	}
+	hash = (hash ^ (hash >> 10) ^ (hash >> 20));
+	return hash;
+}
+
+struct hashentry {
+	char* label;
+	unsigned address;
+	struct hashentry* next;
+};
+static struct hashentry* labelhash[1021];
+
+// no dup check!
+static void hash_add_label(const char* label, unsigned address)
+{
+	struct hashentry* h;
+	unsigned i = hashkey(label, -1U);
+	i %= sizeof(labelhash)/sizeof(labelhash[0]);
+	h = malloc(sizeof(struct hashentry));
+	h->label = strdup(label);
+	h->address = address;
+	h->next = labelhash[i];
+	labelhash[i] = h;
+}
+
+static unsigned lookup_label(const char* label)
+{
+	struct hashentry* h;
+	unsigned i = hashkey(label, -1U);
+	i %= sizeof(labelhash)/sizeof(labelhash[0]);
+	for(h = labelhash[i]; h; h = h->next )
+	{
+		if(!strcmp(h->label, label))
+			return h->address;
+	}
+	if(assembler_pass)
+		crap("label %s undefined", label);
+	return 0;
+}
+
+static void labelhash_free(void)
+{
+	struct hashentry* h;
+	unsigned i;
+	unsigned z = 0, min = -1U, max = 0, t = 0;
+	for ( i = 0; i < sizeof(labelhash)/sizeof(labelhash[0]); ++i)
+	{
+		unsigned n = 0;
+		h = labelhash[i];
+		while(h)
+		{
+			struct hashentry* next = h->next;
+			free(h->label);
+			free(h);
+			h = next;
+			++n;
+		}
+		t+=n;
+		if(!n) ++z;
+		//else printf("%u\n", n);
+		min = MIN(min, n);
+		max = MAX(max, n);
+	}
+	printf("total %u, hsize %lu, zero %u, min %u, max %u\n", t, sizeof(labelhash)/sizeof(labelhash[0]), z, min, max);
+	memset(labelhash, 0, sizeof(labelhash));
+}
+
+/* ************************* */
+
+
+static const char* argtype2str(argtype_t t)
+{
+	switch(t)
+	{
+		case T_NONE: return "none";
+		case T_REGISTER: return "register";
+		case T_IMMEDIATE: return "immediate";
+		case T_MEMORY: return "memory";
+		case T_LABEL: return "label";
+		default: crap("invalid type");
+	}
+	/* not reached */
+	return T_NONE;
+}
+
+/* ************************* */
+
+static inline int iss8(u64 v)
+{
+	return (labs(v) <= 0x80);
+}
+
+static inline int isu8(u64 v)
+{
+	return (v <= 0xff);
+}
+
+static inline int iss16(u64 v)
+{
+	return (labs(v) <= 0x8000);
+}
+
+static inline int isu16(u64 v)
+{
+	return (v <= 0xffff);
+}
+
+static inline int iss32(u64 v)
+{
+	return (labs(v) <= 0x80000000);
+}
+
+static inline int isu32(u64 v)
+{
+	return (v <= 0xffffffff);
+}
+
+static void emit_opsingle(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 op = (u8)((unsigned long) data);
+
+	if(arg1.type != T_NONE || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	emit1(op);
+}
+
+static void emit_opsingle16(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	emit1(0x66);
+	emit_opsingle(mnemonic, arg1, arg2, data);
+}
+
+static void compute_rexmodrmsib(u8* rex_r, u8* modrm_r, u8* sib_r, arg_t* arg1, arg_t* arg2)
+{
+	u8 rex = 0;
+	u8 modrm = 0;
+	u8 sib = 0;
+
+	if((arg1->type == T_REGISTER && arg2->type == T_REGISTER)
+	&& ((arg1->v.reg & R_MSZ) != (arg2->v.reg & R_MSZ))
+	&& !((arg1->v.reg & R_XMM) || (arg2->v.reg & R_XMM)))
+		crap("both registers must be of same width");
+
+	if((arg1->type == T_REGISTER && arg1->v.reg & R_64)
+	|| (arg2->type == T_REGISTER && arg2->v.reg & R_64))
+	{
+		rex |= REX_W;
+	}
+
+	if(arg1->type == T_REGISTER)
+	{
+		if((arg1->v.reg & R_MGP) > 0x07)
+			rex |= REX_R;
+
+		modrm |= (arg1->v.reg & 0x07) << 3;
+	}
+
+	if(arg2->type == T_REGISTER)
+	{
+		if((arg2->v.reg & R_MGP) > 0x07)
+			rex |= REX_B;
+
+		modrm |= (arg2->v.reg & 0x07);
+	}
+
+	if(arg2->type == T_MEMORY)
+	{
+		if((arg2->v.mem.basetype == T_REGISTER && !(arg2->v.mem.base.reg & R_64))
+		|| (arg2->v.mem.indextype == T_REGISTER && !(arg2->v.mem.index.reg & R_64)))
+		{
+			crap("only 64bit base/index registers are %x %x", arg2->v.mem.base.reg, arg2->v.mem.index.reg);
+		}
+
+		if(arg2->v.mem.indextype == T_REGISTER)
+		{
+			modrm |= MODRM_RM_SIB;
+			if(!arg2->v.mem.disp)
+			{
+				modrm |= MODRM_MOD_00;
+			}
+			else if(iss8(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_01;
+			}
+			else if(isu32(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_10;
+			}
+			else
+			{
+				crap("invalid displacement");
+			}
+
+			if((arg2->v.mem.index.reg & R_MGP) > 0x07)
+				rex |= REX_X;
+
+			if((arg2->v.mem.base.reg & R_MGP) > 0x07)
+				rex |= REX_B;
+
+			if(arg2->v.mem.basetype != T_REGISTER)
+				crap("base must be register");
+			switch(arg2->v.mem.scale)
+			{
+				case 1: break;
+				case 2: sib |= 1 << 6; break;
+				case 4: sib |= 2 << 6; break;
+				case 8: sib |= 3 << 6; break;
+			}
+			sib |= (arg2->v.mem.index.reg & 0x07) << 3;
+			sib |= (arg2->v.mem.base.reg & 0x07);
+		}
+		else if(arg2->v.mem.indextype == T_NONE)
+		{
+			if(!arg2->v.mem.disp)
+			{
+				modrm |= MODRM_MOD_00;
+			}
+			else if(iss8(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_01;
+			}
+			else if(isu32(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_10;
+			}
+			else
+			{
+				crap("invalid displacement");
+			}
+
+			if(arg2->v.mem.basetype != T_REGISTER)
+				crap("todo: base != register");
+
+			if((arg2->v.mem.base.reg & R_MGP) > 0x07)
+				rex |= REX_B;
+
+			modrm |= arg2->v.mem.base.reg & 0x07;
+		}
+		else
+		{
+			crap("invalid indextype");
+		}
+	}
+	else
+	{
+		modrm |= MODRM_MOD_11;
+	}
+
+	if(rex)
+		rex |= 0x40; // XXX
+
+	*rex_r = rex;
+	*modrm_r = modrm;
+	*sib_r = sib;
+}
+
+static void maybe_emit_displacement(arg_t* arg)
+{
+	if(arg->type != T_MEMORY)
+		return;
+
+	if(arg->v.mem.disp)
+	{
+		if(iss8(arg->v.mem.disp))
+		{
+			emit1((u8)arg->v.mem.disp);
+		}
+		else if(isu32(arg->v.mem.disp))
+		{
+			emit4(arg->v.mem.disp);
+		}
+		else
+		{
+			crap("invalid displacement");
+		}
+	}
+}
+
+/* one byte operator with register added to operator */
+static void emit_opreg(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 op = (u8)((unsigned long) data);
+
+	if(arg1.type != T_REGISTER || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	if((arg1.v.reg & R_MGP) > 0x07)
+		emit1(0x40 | REX_B);
+
+	op |= (arg1.v.reg & 0x07);
+
+	emit1(op);
+}
+
+/* operator which operates on reg/mem */
+static void emit_op_rm(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+	opparam_t* params = data;
+
+	if((arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+	modrm |= params->subcode << 3;
+
+	if(arg1.v.reg & R_16)
+		emit1(0x66);
+
+	if(rex) emit1(rex);
+	if(arg1.v.reg & R_8)
+		emit1(params->rcode8); // op reg8/mem8,
+	else
+		emit1(params->rcode); // op reg/mem,
+	emit1(modrm);
+	if((modrm & 0x07) == MODRM_RM_SIB)
+		emit1(sib);
+
+	maybe_emit_displacement(&arg1);
+}
+
+/* operator which operates on reg/mem with cl */
+static void emit_op_rm_cl(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+	opparam_t* params = data;
+
+	if(arg2.type != T_REGISTER || arg1.type != T_REGISTER)
+		CRAP_INVALID_ARGS;
+
+	if((arg1.v.reg & R_MGP) != R_ECX && !(arg1.v.reg & R_8))
+		crap("only cl register is valid");
+
+	arg1.type = T_NONE; // don't complain, we know it's cl anyways
+
+	compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+	modrm |= params->subcode << 3;
+
+	if(arg2.v.reg & R_16)
+		emit1(0x66);
+
+	if(rex) emit1(rex);
+	if(arg2.v.reg & R_8)
+		emit1(params->rcode8); // op reg8/mem8,
+	else
+		emit1(params->rcode); // op reg/mem,
+	emit1(modrm);
+	if((modrm & 0x07) == MODRM_RM_SIB)
+		emit1(sib);
+
+	maybe_emit_displacement(&arg2);
+}
+
+static void emit_mov(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex = 0;
+	u8 modrm = 0;
+	u8 sib = 0;
+
+	if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
+	{
+		u8 op = 0xb8;
+		
+		if(arg2.v.reg & R_8)
+		{
+			if(!isu8(arg1.v.imm))
+				crap("value too large for 8bit register");
+
+			op = 0xb0;
+		}
+		else if(arg2.v.reg & R_16)
+		{
+			if(!isu16(arg1.v.imm))
+				crap("value too large for 16bit register");
+			emit1(0x66);
+		}
+		else if(!arg2.v.reg & R_64)
+		{
+			if(!isu32(arg1.v.imm))
+				crap("value too large for 32bit register");
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(rex) emit1(rex);
+
+		op |= (arg2.v.reg & 0x07);
+
+		emit1(op);
+
+		if(arg2.v.reg & R_8) emit1(arg1.v.imm);
+		else if(arg2.v.reg & R_16) emit2(arg1.v.imm);
+		else if(arg2.v.reg & R_64) emit8(arg1.v.imm);
+		else emit4(arg1.v.imm);
+	}
+	else if(arg1.type == T_IMMEDIATE && arg2.type == T_MEMORY)
+	{
+		if(!iss32(arg1.v.imm))
+		{
+			crap("only 32bit immediates supported");
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+		if(rex) emit1(rex);
+		emit1(0xc7); // mov reg/mem, imm
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		emit4(arg1.v.imm);
+	}
+	else if(arg1.type == T_REGISTER && arg2.type == T_REGISTER) // XXX: same as next
+	{
+		if(arg1.type != T_REGISTER || arg2.type != T_REGISTER)
+			crap("both args must be registers");
+
+		if((arg1.v.reg & R_MSZ) != (arg2.v.reg & R_MSZ))
+			crap("both registers must be same width");
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(rex) emit1(rex);
+		emit1(0x89); // mov reg reg/mem,
+		emit1(modrm);
+	}
+	else if(arg1.type == T_REGISTER && arg2.type == T_MEMORY)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(arg1.v.reg & R_16)
+			emit1(0x66);
+
+		if(rex) emit1(rex);
+		if(arg1.v.reg & R_8)
+			emit1(0x88); // mov reg reg/mem,
+		else
+			emit1(0x89); // mov reg reg/mem,
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg2);
+	}
+	else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		if(arg2.v.reg & R_16)
+			emit1(0x66);
+
+		if(rex) emit1(rex);
+		if(arg2.v.reg & R_8)
+			emit1(0x8a); // mov reg/mem, reg
+		else
+			emit1(0x8b); // mov reg/mem, reg
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg1);
+	}
+	else
+		CRAP_INVALID_ARGS;
+}
+
+static void emit_subaddand(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex = 0;
+	u8 modrm = 0;
+	u8 sib = 0;
+
+	opparam_t* params = data;
+
+	if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
+	{
+		if(!iss32(arg1.v.imm))
+		{
+			crap("only 8 and 32 bit immediates supported");
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		modrm |= params->subcode << 3;
+
+		if(rex) emit1(rex);
+#if 0
+		if(isu8(arg1.v.imm))
+		{
+			emit1(0x83); // sub reg/mem, imm8
+			emit1(modrm);
+			emit1(arg1.v.imm&0xFF);
+		}
+		else
+#endif
+		{
+			emit1(0x81); // sub reg/mem, imm32
+			emit1(modrm);
+			emit4(arg1.v.imm);
+		}
+	}
+	else if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(rex) emit1(rex);
+		emit1(params->rmcode); // sub reg/mem, reg
+		emit1(modrm);
+		if(arg2.type == T_MEMORY && (modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg2);
+	}
+	else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		if(rex) emit1(rex);
+		emit1(params->mrcode); // sub reg, reg/mem
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg1);
+	}
+	else
+		CRAP_INVALID_ARGS;
+}
+
+static void emit_condjump(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	unsigned off;
+	int disp;
+	unsigned char opcode = (unsigned char)(((unsigned long)data)&0xFF);
+
+	if(arg1.type != T_LABEL || arg2.type != T_NONE)
+		crap("%s: argument must be label", mnemonic);
+
+	emit1(opcode);
+
+	off = lookup_label(arg1.v.label);
+	disp = off-(compiledOfs+1);
+	if(assembler_pass && abs(disp) > 127)
+		crap("cannot jump that far (%x -> %x = %x)", compiledOfs, off, disp);
+
+	emit1(disp);
+}
+
+static void emit_jmp(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	if((arg1.type != T_LABEL && arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	if(arg1.type == T_LABEL)
+	{
+		unsigned off;
+		int disp;
+
+		off = lookup_label(arg1.v.label);
+		disp = off-(compiledOfs+5);
+		emit1(0xe9);
+		emit4(disp);
+	}
+	else
+	{
+		u8 rex, modrm, sib;
+
+		if(arg1.type == T_REGISTER)
+		{
+			if(!arg1.absolute)
+				crap("jmp must be absolute");
+
+			if((arg1.v.reg & R_64) != R_64)
+				crap("register must be 64bit");
+
+			arg1.v.reg ^= R_64; // no rex required for call
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		modrm |= 0x4 << 3;
+
+		if(rex) emit1(rex);
+		emit1(0xff);
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+		maybe_emit_displacement(&arg1);
+	}
+}
+
+static void emit_call(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+
+	if(arg1.type != T_REGISTER || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	if(!arg1.absolute)
+		crap("call must be absolute");
+
+	if((arg1.v.reg & R_64) != R_64)
+		crap("register must be 64bit");
+
+	arg1.v.reg ^= R_64; // no rex required for call
+
+	compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+	modrm |= 0x2 << 3;
+
+	if(rex) emit1(rex);
+	emit1(0xff);
+	emit1(modrm);
+}
+
+
+static void emit_twobyte(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+
+	opparam_t* params = data;
+
+	if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(params->xmmprefix) emit1(params->xmmprefix);
+		if(rex) emit1(rex);
+		emit1(0x0f);
+		emit1(params->rmcode); // sub reg/mem, reg
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg2);
+	}
+	else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		if(params->xmmprefix) emit1(params->xmmprefix);
+		if(rex) emit1(rex);
+		emit1(0x0f);
+		emit1(params->mrcode); // sub reg, reg/mem
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg1);
+	}
+	else
+		CRAP_INVALID_ARGS;
+}
+
+static opparam_t params_add = { subcode: 0, rmcode: 0x01, };
+static opparam_t params_or = { subcode: 1, rmcode: 0x09, };
+static opparam_t params_and = { subcode: 4, rmcode: 0x21, };
+static opparam_t params_sub = { subcode: 5, rmcode: 0x29, };
+static opparam_t params_xor = { subcode: 6, rmcode: 0x31, };
+static opparam_t params_cmp = { subcode: 6, rmcode: 0x39, mrcode: 0x3b, };
+static opparam_t params_dec = { subcode: 1, rcode: 0xff, rcode8: 0xfe, };
+static opparam_t params_sar = { subcode: 7, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_shl = { subcode: 4, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_shr = { subcode: 5, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_idiv = { subcode: 7, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_div = { subcode: 6, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_imul = { subcode: 5, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_mul = { subcode: 4, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_neg = { subcode: 3, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_not = { subcode: 2, rcode: 0xf7, rcode8: 0xf6, };
+
+static opparam_t params_cvtsi2ss = { xmmprefix: 0xf3, rmcode: 0x2a };
+static opparam_t params_cvttss2si = { xmmprefix: 0xf3, rmcode: 0x2c };
+static opparam_t params_addss = { xmmprefix: 0xf3, mrcode: 0x58 };
+static opparam_t params_divss = { xmmprefix: 0xf3, mrcode: 0x5e };
+static opparam_t params_movss = { xmmprefix: 0xf3, mrcode: 0x10, rmcode: 0x11 };
+static opparam_t params_mulss = { xmmprefix: 0xf3, mrcode: 0x59 };
+static opparam_t params_subss = { xmmprefix: 0xf3, mrcode: 0x5c };
+static opparam_t params_ucomiss = { mrcode: 0x2e };
+
+static int ops_sorted = 0;
+static op_t ops[] = {
+	{ "addl", emit_subaddand, &params_add },
+	{ "addq", emit_subaddand, &params_add },
+	{ "addss", emit_twobyte, &params_addss },
+	{ "andl", emit_subaddand, &params_and },
+	{ "andq", emit_subaddand, &params_and },
+	{ "callq", emit_call, NULL },
+	{ "cbw", emit_opsingle16, (void*)0x98 },
+	{ "cdq", emit_opsingle, (void*)0x99 },
+	{ "cmpl", emit_subaddand, &params_cmp },
+	{ "cmpq", emit_subaddand, &params_cmp },
+	{ "cvtsi2ss", emit_twobyte, &params_cvtsi2ss },
+	{ "cvttss2si", emit_twobyte, &params_cvttss2si },
+	{ "cwde", emit_opsingle, (void*)0x98 },
+	{ "decl", emit_op_rm, &params_dec },
+	{ "decq", emit_op_rm, &params_dec },
+	{ "divl", emit_op_rm, &params_div },
+	{ "divq", emit_op_rm, &params_div },
+	{ "divss", emit_twobyte, &params_divss },
+	{ "idivl", emit_op_rm, &params_idiv },
+	{ "imull", emit_op_rm, &params_imul },
+	{ "int3", emit_opsingle, (void*)0xcc },
+	{ "ja", emit_condjump, (void*)0x77 },
+	{ "jbe", emit_condjump, (void*)0x76 },
+	{ "jb", emit_condjump, (void*)0x72 },
+	{ "je", emit_condjump, (void*)0x74 },
+	{ "jl", emit_condjump, (void*)0x7c },
+	{ "jmp", emit_jmp, NULL },
+	{ "jmpq", emit_jmp, NULL },
+	{ "jnae", emit_condjump, (void*)0x72 },
+	{ "jna", emit_condjump, (void*)0x76 },
+	{ "jnbe", emit_condjump, (void*)0x77 },
+	{ "jnb", emit_condjump, (void*)0x73 },
+	{ "jnc", emit_condjump, (void*)0x73 },
+	{ "jne", emit_condjump, (void*)0x75 },
+	{ "jnge", emit_condjump, (void*)0x7c },
+	{ "jng", emit_condjump, (void*)0x7e },
+	{ "jnle", emit_condjump, (void*)0x7f },
+	{ "jnl", emit_condjump, (void*)0x7d },
+	{ "jnz", emit_condjump, (void*)0x75 },
+	{ "jp", emit_condjump, (void*)0x7a },
+	{ "jz", emit_condjump, (void*)0x74 },
+	{ "movb", emit_mov, NULL },
+	{ "movl", emit_mov, NULL },
+	{ "movq", emit_mov, NULL },
+	{ "movss", emit_twobyte, &params_movss },
+	{ "movw", emit_mov, NULL },
+	{ "mull", emit_op_rm, &params_mul },
+	{ "mulss", emit_twobyte, &params_mulss },
+	{ "negl", emit_op_rm, &params_neg },
+	{ "negq", emit_op_rm, &params_neg },
+	{ "nop", emit_opsingle, (void*)0x90 },
+	{ "notl", emit_op_rm, &params_not },
+	{ "notq", emit_op_rm, &params_not },
+	{ "or",   emit_subaddand, &params_or },
+	{ "orl",  emit_subaddand, &params_or },
+	{ "pop", emit_opreg, (void*)0x58 },
+	{ "push", emit_opreg, (void*)0x50 },
+	{ "ret", emit_opsingle, (void*)0xc3 },
+	{ "sarl", emit_op_rm_cl, &params_sar },
+	{ "shl", emit_op_rm_cl, &params_shl },
+	{ "shrl", emit_op_rm_cl, &params_shr },
+	{ "subl", emit_subaddand, &params_sub },
+	{ "subq", emit_subaddand, &params_sub },
+	{ "subss", emit_twobyte, &params_subss },
+	{ "ucomiss", emit_twobyte, &params_ucomiss },
+	{ "xorl",  emit_subaddand, &params_xor },
+	{ "xorq",  emit_subaddand, &params_xor },
+	{ NULL, NULL, NULL }
+};
+
+static int opsort(const void* A, const void* B)
+{
+	const op_t* a = A;
+	const op_t* b = B;
+	return strcmp(a->mnemonic, b->mnemonic);
+}
+
+static op_t* getop(const char* n)
+{
+#if 0
+	op_t* o = ops;
+	while(o->mnemonic)
+	{
+		if(!strcmp(o->mnemonic, n))
+			return o;
+		++o;
+	}
+
+#else
+	unsigned m, t, b;
+	int r;
+	t = sizeof(ops)/sizeof(ops[0])-1;
+	b = 0;
+
+	while(b <= t)
+	{
+		m = ((t-b)>>1) + b;
+		if((r = strcmp(ops[m].mnemonic, n)) == 0)
+		{
+			return &ops[m];
+		}
+		else if(r < 0)
+		{
+			b = m + 1;
+		}
+		else
+		{
+			t = m - 1;
+		}
+	}
+#endif
+
+	return NULL;
+}
+
+static reg_t parsereg(const char* str)
+{
+	const char* s = str;
+	if(*s == 'a' && s[1] == 'l' && !s[2])
+	{
+		return R_AL;
+	}
+	else if(*s == 'a' && s[1] == 'x' && !s[2])
+	{
+		return R_AX;
+	}
+	if(*s == 'c' && s[1] == 'l' && !s[2])
+	{
+		return R_CL;
+	}
+	if(*s == 'x')
+	{
+		if(!strcmp(s, "xmm0"))
+			return R_XMM0;
+	}
+	else if(*s == 'r' && s[1])
+	{
+		++s;
+		if(s[1] == 'x')
+		{
+			switch(*s++)
+			{
+				case 'a': return R_RAX;
+				case 'b': return R_RBX;
+				case 'c': return R_RCX;
+				case 'd': return R_RDX;
+			}
+		}
+		else if(s[1] == 'i')
+		{
+			switch(*s++)
+			{
+				case 's': return R_RSI;
+				case 'd': return R_RDI;
+			}
+		}
+		else if(s[0] == 's' && s[1] == 'p' && !s[2])
+		{
+			return R_RSP;
+		}
+		else if(*s == '8' && !s[1])
+			return R_R8;
+		else if(*s == '9' && !s[1])
+			return R_R9;
+		else if(*s == '1' && s[1] == '0')
+			return R_R10;
+		else if(*s == '1' && s[1] == '5')
+			return R_R15;
+	}
+	else if(*s == 'e' && s[1])
+	{
+		++s;
+		if(s[1] == 'x')
+		{
+			switch(*s++)
+			{
+				case 'a': return R_EAX;
+				case 'b': return R_EBX;
+				case 'c': return R_ECX;
+				case 'd': return R_EDX;
+			}
+		}
+		else if(s[1] == 'i')
+		{
+			switch(*s++)
+			{
+				case 's': return R_ESI;
+				case 'd': return R_EDI;
+			}
+		}
+	}
+
+	crap("invalid register %s", str);
+
+	return 0;
+}
+
+typedef enum {
+	TOK_LABEL = 0x80,
+	TOK_INT = 0x81,
+	TOK_END = 0x82,
+	TOK_INVALID = 0x83,
+} token_t;
+
+static unsigned char nexttok(const char** str, char* label, u64* val)
+{
+	const char* s = *str;
+
+	if(label) *label = 0;
+	if(val) *val = 0;
+
+	while(*s && *s == ' ') ++s;
+
+	if(!*s)
+	{
+		return TOK_END;
+	}
+	else if(*s == '$' || *s == '*' || *s == '%' || *s == '-' || *s == ')' || *s == '(' || *s == ',')
+	{
+		*str = s+1;
+		return *s;
+	}
+	else if(*s >= 'a' && *s <= 'z')
+	{
+		size_t a = strspn(s+1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
+		if(a+1 >= LABELLEN)
+			crap("label %s too long", s);
+		if(label)
+		{
+			strncpy(label, s, a+1);
+			label[a+1] = 0;
+		}
+		*str = s+a+1;
+		return TOK_LABEL;
+	}
+	else if(*s >= '0' && *s <= '9')
+	{
+		char* endptr = NULL;
+		u64 v = strtol(s, &endptr, 0);
+		if(endptr && (endptr-s == 0))
+			crap("invalid integer %s", s);
+		if(val) *val = v;
+		*str = endptr;
+		return TOK_INT;
+	}
+	crap("can't parse '%s'", *str);
+	return TOK_INVALID;
+}
+
+static arg_t parsearg(const char** str)
+{
+	arg_t arg;
+	const char* s = *str;
+	char label[20];
+	u64 val;
+	int negative = 1;
+	unsigned ttype;
+
+	arg.type = T_NONE;
+	arg.absolute = 0;
+
+	while(*s && *s == ' ') ++s;
+
+	switch(nexttok(&s, label, &val))
+	{
+		case '$' :
+			ttype = nexttok(&s, NULL, &val);
+			if(ttype == '-')
+			{
+				negative = -1;
+				ttype = nexttok(&s, NULL, &val);
+			}
+			if(ttype != TOK_INT)
+				crap("expected integer");
+			arg.type = T_IMMEDIATE;
+			arg.v.imm = negative * val;
+			break;
+		case '*' :
+			if((ttype = nexttok(&s, NULL, NULL)) != '%')
+			{
+				if(ttype == '(')
+					goto tok_memory;
+				crap("expected '%%'");
+			}
+			arg.absolute = 1;
+			/* fall through */
+		case '%' :
+			if(nexttok(&s, label, &val) != TOK_LABEL)
+				crap("expected label");
+			arg.type = T_REGISTER;
+			arg.v.reg = parsereg(label);
+			break;
+		case TOK_LABEL:
+			arg.type = T_LABEL;
+			strncpy(arg.v.label, label, LABELLEN);
+			break;
+		case '-':
+			negative = -1;
+			if(nexttok(&s, NULL, &val) != TOK_INT)
+				crap("expected integer");
+			/* fall through */
+		case TOK_INT:
+			if(nexttok(&s, label, NULL) != '(')
+				crap("expected '('"); // mov to/from fixed address not supported
+			/* fall through */
+		case '(':
+tok_memory:
+			arg.type = T_MEMORY;
+			arg.v.mem.indextype = T_NONE;
+			arg.v.mem.disp = negative * val;
+			ttype = nexttok(&s, label, &val);
+			if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
+			{
+				crap("expected register");
+			}
+			if (ttype == '%')
+			{
+				arg.v.mem.basetype = T_REGISTER;
+				arg.v.mem.base.reg = parsereg(label);
+			}
+			else if (ttype == TOK_INT)
+			{
+				arg.v.mem.basetype = T_IMMEDIATE;
+				arg.v.mem.base.imm = val;
+			}
+			if((ttype = nexttok(&s, NULL, NULL)) == ',')
+			{
+				ttype = nexttok(&s, label, &val);
+				if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
+				{
+					crap("expected register");
+				}
+				if (ttype == '%')
+				{
+					arg.v.mem.indextype = T_REGISTER;
+					arg.v.mem.index.reg = parsereg(label);
+				}
+				else if (ttype == TOK_INT)
+				{
+					crap("index must be register");
+					arg.v.mem.indextype = T_IMMEDIATE;
+					arg.v.mem.index.imm = val;
+				}
+				if(nexttok(&s, NULL, NULL) != ',')
+					crap("expected ','");
+				if(nexttok(&s, NULL, &val) != TOK_INT)
+					crap("expected integer");
+				if(val != 1 && val != 2 && val != 4 && val != 8)
+					crap("scale must 1, 2, 4 or 8");
+				arg.v.mem.scale = val;
+
+				ttype = nexttok(&s, NULL, NULL);
+			}
+			if(ttype != ')')
+			{
+				crap("expected ')' or ','");
+			}
+			break;
+		default:
+			crap("invalid token %hhu in %s", *(unsigned char*)s, *str);
+			break;
+	}
+
+	*str = s;
+
+	return arg;
+}
+
+/* ************************* */
+
+void assembler_init(int pass)
+{
+	compiledOfs = 0;
+	assembler_pass = pass;
+	if(!pass)
+	{
+		labelhash_free();
+		cur_line = NULL;
+	}
+	if(!ops_sorted)
+	{
+		ops_sorted = 1;
+		qsort(ops, sizeof(ops)/sizeof(ops[0])-1, sizeof(ops[0]), opsort);
+	}
+}
+
+size_t assembler_get_code_size(void)
+{
+	return compiledOfs;
+}
+
+void assembler_set_output(char* buf)
+{
+	out = buf;
+}
+
+void assemble_line(const char* input, size_t len)
+{
+	char line[4096];
+	char* s;
+	op_t* o;
+	char* opn;
+	arg_t arg1, arg2;
+
+	arg1.type = T_NONE;
+	arg2.type = T_NONE;
+	opn = NULL;
+	o = NULL;
+
+	if(len < 1)
+		return;
+
+	if(len >= sizeof(line))
+		crap("line too long");
+
+	memcpy(line, input, sizeof(line));
+	cur_line = input;
+
+	if(line[len-1] == '\n') line[--len] = 0;
+	if(line[len-1] == ':')
+	{
+		line[--len] = 0;
+		if(assembler_pass)
+			debug("%s: 0x%x\n", line, compiledOfs);
+		else
+			hash_add_label(line, compiledOfs);
+	}
+	else
+	{
+		opn = line;
+		s = strchr(line, ' ');
+		if(s)
+		{
+			*s++ = 0;
+			arg1 = parsearg((const char**)&s);
+			if(*s)
+			{
+				if(*s != ',')
+					crap("expected ',', got '%c'", *s);
+				++s;
+				arg2 = parsearg((const char**)&s);
+			}
+		}
+
+		if(!opn)
+		{
+			crap("no operator in %s", line);
+		}
+
+		o = getop(opn);
+		if(!o)
+		{
+			crap("cannot handle op %s", opn);
+		}
+		o->func(opn, arg1, arg2, o->data);
+		if(assembler_pass)
+			debug("   - %s%s", cur_line, cur_line[strlen(cur_line)-1]=='\n'?"":"\n");
+	}
+}
+
+#ifdef SA_STANDALONE
+int main(int argc, char* argv[])
+{
+	char line[4096];
+	size_t len;
+	int pass;
+	FILE* file = NULL;
+
+	if(argc < 2)
+	{
+		crap("specify file");
+	}
+
+	file = fopen(argv[1], "r");
+	if(!file)
+	{
+		crap("can't open file");
+	}
+
+	if(argc > 2)
+	{
+		fout = fopen(argv[2], "w");
+		if(!fout)
+		{
+			crap("can't open %s for writing", argv[2]);
+		}
+	}
+
+	for(pass = 0; pass < 2; ++pass)
+	{
+		if(fseek(file, 0, SEEK_SET))
+			crap("can't rewind file");
+
+		if(pass)
+		{
+			char* b = malloc(assembler_get_code_size());
+			if(!b)
+				crap("cannot allocate memory");
+			assembler_set_output(b);
+		}
+
+		assembler_init(pass);
+
+		while(fgets(line, sizeof(line), file))
+		{
+			len = strlen(line);
+			if(!len) continue;
+
+			assemble_line(line, len);
+		}
+	}
+
+	assembler_init(0);
+
+	fclose(file);
+
+	return 0;
+}
+#endif
diff --git a/src/renderer/tr_image.c b/src/renderer/tr_image.c
index bd51ff2d..803f86e2 100644
--- a/src/renderer/tr_image.c
+++ b/src/renderer/tr_image.c
@@ -34,10 +34,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #define JPEG_INTERNALS
 #include "../jpeg-6/jpeglib.h"
 
+#include "../qcommon/puff.h"
+
 
 static void LoadBMP( const char *name, byte **pic, int *width, int *height );
 static void LoadTGA( const char *name, byte **pic, int *width, int *height );
 static void LoadJPG( const char *name, byte **pic, int *width, int *height );
+static void LoadPNG( const char *name, byte **pic, int *width, int *height );
 
 static byte			 s_intensitytable[256];
 static unsigned char s_gammatable[256];
@@ -1932,6 +1935,2450 @@ int SaveJPGToBuffer( byte *buffer, int quality,
 
 /*
 =================
+PNG LOADING
+=================
+*/
+
+/*
+ *  Quake 3 image format : RGBA
+ */
+
+#define Q3IMAGE_BYTESPERPIXEL (4)
+
+/*
+ *  PNG specifications
+ */
+
+/*
+ *  The first 8 Bytes of every PNG-File are a fixed signature
+ *  to identify the file as a PNG.
+ */
+
+#define PNG_Signature "\x89\x50\x4E\x47\xD\xA\x1A\xA"
+#define PNG_Signature_Size (8)
+
+/*
+ *  After the signature diverse chunks follow.
+ *  A chunk consists of a header and if Length
+ *  is bigger than 0 a body and a CRC of the body follow.
+ */
+
+struct PNG_ChunkHeader
+{
+    uint32_t Length;
+    uint32_t Type;
+};
+
+#define PNG_ChunkHeader_Size (8)
+
+typedef uint32_t PNG_ChunkCRC;
+
+#define PNG_ChunkCRC_Size (4)
+
+/*
+ *  We use the following ChunkTypes.
+ *  All others are ignored.
+ */
+
+#define MAKE_CHUNKTYPE(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | ((d)))
+
+#define PNG_ChunkType_IHDR MAKE_CHUNKTYPE('I', 'H', 'D', 'R')
+#define PNG_ChunkType_PLTE MAKE_CHUNKTYPE('P', 'L', 'T', 'E')
+#define PNG_ChunkType_IDAT MAKE_CHUNKTYPE('I', 'D', 'A', 'T')
+#define PNG_ChunkType_IEND MAKE_CHUNKTYPE('I', 'E', 'N', 'D')
+#define PNG_ChunkType_tRNS MAKE_CHUNKTYPE('t', 'R', 'N', 'S')
+
+/*
+ *  Per specification the first chunk after the signature SHALL be IHDR.
+ */
+
+struct PNG_Chunk_IHDR
+{
+    uint32_t Width;
+    uint32_t Height;
+    uint8_t  BitDepth;
+    uint8_t  ColourType;
+    uint8_t  CompressionMethod;
+    uint8_t  FilterMethod;
+    uint8_t  InterlaceMethod;
+};
+
+#define PNG_Chunk_IHDR_Size (13)
+
+/*
+ *  ColourTypes
+ */
+
+#define PNG_ColourType_Grey      (0)
+#define PNG_ColourType_True      (2)
+#define PNG_ColourType_Indexed   (3)
+#define PNG_ColourType_GreyAlpha (4)
+#define PNG_ColourType_TrueAlpha (6)
+
+/*
+ *  number of colour components
+ *
+ *  Grey      : 1 grey
+ *  True      : 1 R, 1 G, 1 B
+ *  Indexed   : 1 index
+ *  GreyAlpha : 1 grey, 1 alpha
+ *  TrueAlpha : 1 R, 1 G, 1 B, 1 alpha
+ */
+
+#define PNG_NumColourComponents_Grey      (1)
+#define PNG_NumColourComponents_True      (3)
+#define PNG_NumColourComponents_Indexed   (1)
+#define PNG_NumColourComponents_GreyAlpha (2)
+#define PNG_NumColourComponents_TrueAlpha (4)
+
+/*
+ *  For the different ColourTypes
+ *  different BitDepths are specified.
+ */
+
+#define PNG_BitDepth_1  ( 1)
+#define PNG_BitDepth_2  ( 2)
+#define PNG_BitDepth_4  ( 4)
+#define PNG_BitDepth_8  ( 8)
+#define PNG_BitDepth_16 (16)
+
+/*
+ *  Only one valid CompressionMethod is standardized.
+ */
+
+#define PNG_CompressionMethod_0 (0)
+
+/*
+ *  Only one valid FilterMethod is currently standardized.
+ */
+
+#define PNG_FilterMethod_0 (0)
+
+/*
+ *  This FilterMethod defines 5 FilterTypes
+ */
+
+#define PNG_FilterType_None    (0)
+#define PNG_FilterType_Sub     (1)
+#define PNG_FilterType_Up      (2)
+#define PNG_FilterType_Average (3)
+#define PNG_FilterType_Paeth   (4)
+
+/*
+ *  Two InterlaceMethods are standardized :
+ *  0 - NonInterlaced
+ *  1 - Interlaced
+ */
+
+#define PNG_InterlaceMethod_NonInterlaced (0)
+#define PNG_InterlaceMethod_Interlaced    (1)
+
+/*
+ *  The Adam7 interlace method uses 7 passes.
+ */
+
+#define PNG_Adam7_NumPasses (7)
+
+/*
+ *  The compressed data starts with a header ...
+ */
+
+struct PNG_ZlibHeader
+{
+    uint8_t CompressionMethod;
+    uint8_t Flags;
+};
+
+#define PNG_ZlibHeader_Size (2)
+
+/*
+ *  ... and is followed by a check value
+ */
+
+#define PNG_ZlibCheckValue_Size (4)
+
+/*
+ *  Some support functions for buffered files follow.
+ */
+
+/*
+ *  buffered file representation
+ */
+
+struct BufferedFile
+{
+    byte *Buffer;
+    int   Length;
+    byte *Ptr;
+    int   BytesLeft;
+};
+
+/*
+ *  Read a file into a buffer.
+ */
+
+static struct BufferedFile *ReadBufferedFile(const char *name)
+{
+    struct BufferedFile *BF;
+
+    /*
+     *  input verification
+     */
+
+    if(!name)
+    {
+        return(NULL);
+    }
+
+    /*
+     *  Allocate control struct.
+     */
+
+    BF = ri.Malloc(sizeof(struct BufferedFile));
+    if(!BF)
+    {
+        return(NULL);
+    }
+
+    /*
+     *  Initialize the structs components.
+     */
+
+    BF->Length    = 0;
+    BF->Buffer    = NULL;
+    BF->Ptr       = NULL;
+    BF->BytesLeft = 0;
+
+    /*
+     *  Read the file.
+     */
+
+    BF->Length = ri.FS_ReadFile((char *) name, (void **) &BF->Buffer);
+
+    /*
+     *  Did we get it? Is it big enough?
+     */
+
+    if(!(BF->Buffer && (BF->Length > 0)))
+    {
+        ri.Free(BF);
+
+        return(NULL);
+    }
+
+    /*
+     *  Set the pointers and counters.
+     */
+
+    BF->Ptr       = BF->Buffer;
+    BF->BytesLeft = BF->Length;
+
+    return(BF);
+}
+
+/*
+ *  Close a buffered file.
+ */
+
+static void CloseBufferedFile(struct BufferedFile *BF)
+{
+    if(BF)
+    {
+        if(BF->Buffer)
+        {
+            ri.FS_FreeFile(BF->Buffer);
+        }
+ 
+        ri.Free(BF);
+    }
+}
+
+/*
+ *  Get a pointer to the requested bytes.
+ */
+
+static void *BufferedFileRead(struct BufferedFile *BF, int Length)
+{
+    void *RetVal;
+
+    /*
+     *  input verification
+     */
+
+    if(!(BF && Length))
+    {
+        return(NULL);
+    }
+ 
+    /*
+     *  not enough bytes left
+     */
+
+    if(Length > BF->BytesLeft)
+    {
+        return(NULL);
+    }
+
+    /*
+     *  the pointer to the requested data
+     */
+
+    RetVal = BF->Ptr;
+ 
+    /*
+     *  Raise the pointer and counter.
+     */
+
+    BF->Ptr       += Length;
+    BF->BytesLeft -= Length;
+
+    return(RetVal);
+}
+
+/*
+ *  Rewind the buffer.
+ */
+
+static qboolean BufferedFileRewind(struct BufferedFile *BF, int Offset)
+{
+    int BytesRead; 
+
+    /*
+     *  input verification
+     */
+
+    if(!BF)
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  special trick to rewind to the beginning of the buffer
+     */
+
+    if(Offset == -1)
+    {
+        BF->Ptr       = BF->Buffer;
+        BF->BytesLeft = BF->Length;
+  
+        return(qtrue);
+    }
+
+    /*
+     *  How many bytes do we have already read?
+     */
+
+    BytesRead = BF->Ptr - BF->Buffer;
+
+    /*
+     *  We can only rewind to the beginning of the BufferedFile.
+     */
+
+    if(Offset > BytesRead)
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  lower the pointer and counter.
+     */
+
+    BF->Ptr       -= Offset;
+    BF->BytesLeft += Offset;
+
+    return(qtrue);
+}
+
+/*
+ *  Skip some bytes.
+ */
+
+static qboolean BufferedFileSkip(struct BufferedFile *BF, int Offset)
+{
+    /*
+     *  input verification
+     */
+
+    if(!BF)
+    {
+        return(qfalse);
+    }
+ 
+    /*
+     *  We can only skip to the end of the BufferedFile.
+     */
+
+    if(Offset > BF->BytesLeft)
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  lower the pointer and counter.
+     */
+
+    BF->Ptr       += Offset;
+    BF->BytesLeft -= Offset;
+
+    return(qtrue);
+}
+
+/*
+ *  Find a chunk
+ */
+
+static qboolean FindChunk(struct BufferedFile *BF, uint32_t ChunkType)
+{
+    struct PNG_ChunkHeader *CH;
+
+    uint32_t Length;
+    uint32_t Type;
+
+    /*
+     *  input verification
+     */
+
+    if(!BF)
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  cycle trough the chunks
+     */
+
+    while(qtrue)
+    {
+        /*
+         *  Read the chunk-header.
+         */
+
+        CH = BufferedFileRead(BF, PNG_ChunkHeader_Size);
+        if(!CH)
+        {
+            return(qfalse);
+        }
+
+        /*
+         *  Do not swap the original types
+         *  they might be needed later.
+         */
+
+        Length = BigLong(CH->Length);
+        Type   = BigLong(CH->Type);
+  
+        /*
+         *  We found it!
+         */
+
+        if(Type == ChunkType)
+        {
+            /*
+             *  Rewind to the start of the chunk.
+             */
+	     
+            BufferedFileRewind(BF, PNG_ChunkHeader_Size);
+  
+            break;
+        }
+        else
+        {
+            /*
+             *  Skip the rest of the chunk.
+             */
+
+            if(Length)
+            {
+                if(!BufferedFileSkip(BF, Length + PNG_ChunkCRC_Size))
+                {
+                    return(qfalse);
+                }  
+            }
+        }
+    }
+
+    return(qtrue);
+}
+
+/*
+ *  Decompress all IDATs
+ */
+
+static uint32_t DecompressIDATs(struct BufferedFile *BF, uint8_t **Buffer)
+{
+    uint8_t  *DecompressedData;
+    uint32_t  DecompressedDataLength;
+
+    uint8_t  *CompressedData;
+    uint8_t  *CompressedDataPtr;
+    uint32_t  CompressedDataLength;
+
+    struct PNG_ChunkHeader *CH;
+
+    uint32_t Length;
+    uint32_t Type;
+
+    int BytesToRewind;
+
+    int32_t   puffResult;
+    uint8_t  *puffDest;
+    uint32_t  puffDestLen;
+    uint8_t  *puffSrc;
+    uint32_t  puffSrcLen;
+
+    /*
+     *  input verification
+     */
+
+    if(!(BF && Buffer))
+    {
+        return(-1);
+    }
+
+    /*
+     *  some zeroing
+     */
+
+    DecompressedData = NULL;
+    DecompressedDataLength = 0;
+    *Buffer = DecompressedData;
+
+    CompressedData = NULL;
+    CompressedDataLength = 0;
+
+    BytesToRewind = 0;
+
+    /*
+     *  Find the first IDAT chunk.
+     */
+
+    if(!FindChunk(BF, PNG_ChunkType_IDAT))
+    {
+        return(-1);
+    }
+
+    /*
+     *  Count the size of the uncompressed data
+     */
+
+    while(qtrue)
+    {
+        /*
+         *  Read chunk header
+         */
+
+        CH = BufferedFileRead(BF, PNG_ChunkHeader_Size);
+        if(!CH)
+        {
+            /*
+             *  Rewind to the start of this adventure
+             *  and return unsuccessfull
+             */
+
+            BufferedFileRewind(BF, BytesToRewind);
+
+            return(-1);
+        }
+
+        /*
+         *  Length and Type of chunk
+         */
+
+        Length = BigLong(CH->Length);
+        Type   = BigLong(CH->Type);
+
+        /*
+         *  We have reached the end of the IDAT chunks
+         */
+
+        if(!(Type == PNG_ChunkType_IDAT))
+        {
+            BufferedFileRewind(BF, PNG_ChunkHeader_Size); 
+  
+            break;
+        }
+
+        /*
+         *  Add chunk header to count.
+         */
+
+        BytesToRewind += PNG_ChunkHeader_Size;
+
+        /*
+         *  Skip to next chunk
+         */
+
+        if(Length)
+        {
+            if(!BufferedFileSkip(BF, Length + PNG_ChunkCRC_Size))
+            {
+                BufferedFileRewind(BF, BytesToRewind);
+
+                return(-1);
+            }
+
+            BytesToRewind += Length + PNG_ChunkCRC_Size;
+            CompressedDataLength += Length;
+        } 
+    }
+
+    BufferedFileRewind(BF, BytesToRewind);
+
+    CompressedData = ri.Malloc(CompressedDataLength);
+    if(!CompressedData)
+    {
+        return(-1);
+    }
+ 
+    CompressedDataPtr = CompressedData;
+
+    /*
+     *  Collect the compressed Data
+     */
+
+    while(qtrue)
+    {
+        /*
+         *  Read chunk header
+         */
+
+        CH = BufferedFileRead(BF, PNG_ChunkHeader_Size);
+        if(!CH)
+        {
+            ri.Free(CompressedData); 
+  
+            return(-1);
+        }
+
+        /*
+         *  Length and Type of chunk
+         */
+
+        Length = BigLong(CH->Length);
+        Type   = BigLong(CH->Type);
+
+        /*
+         *  We have reached the end of the IDAT chunks
+         */
+
+        if(!(Type == PNG_ChunkType_IDAT))
+        {
+            BufferedFileRewind(BF, PNG_ChunkHeader_Size); 
+  
+            break;
+        }
+
+        /*
+         *  Copy the Data
+         */
+
+        if(Length)
+        {
+            uint8_t *OrigCompressedData;
+   
+            OrigCompressedData = BufferedFileRead(BF, Length);
+            if(!OrigCompressedData)
+            {
+                ri.Free(CompressedData); 
+  
+                return(-1);
+            }
+
+            if(!BufferedFileSkip(BF, PNG_ChunkCRC_Size))
+            {
+                ri.Free(CompressedData); 
+
+                return(-1);
+            }
+  
+            memcpy(CompressedDataPtr, OrigCompressedData, Length);
+            CompressedDataPtr += Length;
+        } 
+    }
+
+    /*
+     *  Let puff() calculate the decompressed data length.
+     */
+
+    puffDest    = NULL;
+    puffDestLen = 0;
+ 
+    /*
+     *  The zlib header and checkvalue don't belong to the compressed data.
+     */
+
+    puffSrc    = CompressedData + PNG_ZlibHeader_Size;
+    puffSrcLen = CompressedDataLength - PNG_ZlibHeader_Size - PNG_ZlibCheckValue_Size;
+
+    /*
+     *  first puff() to calculate the size of the uncompressed data
+     */
+
+    puffResult = puff(puffDest, &puffDestLen, puffSrc, &puffSrcLen);
+    if(!((puffResult == 0) && (puffDestLen > 0)))
+    {
+        ri.Free(CompressedData);
+ 
+        return(-1);
+    }
+
+    /*
+     *  Allocate the buffer for the uncompressed data.
+     */
+
+    DecompressedData = ri.Malloc(puffDestLen);
+    if(!DecompressedData)
+    {
+        ri.Free(CompressedData);
+ 
+        return(-1);
+    }
+
+    /*
+     *  Set the input again in case something was changed by the last puff() .
+     */
+
+    puffDest   = DecompressedData;
+    puffSrc    = CompressedData + PNG_ZlibHeader_Size;
+    puffSrcLen = CompressedDataLength - PNG_ZlibHeader_Size - PNG_ZlibCheckValue_Size;
+ 
+    /*
+     *  decompression puff()
+     */
+
+    puffResult = puff(puffDest, &puffDestLen, puffSrc, &puffSrcLen);
+
+    /*
+     *  The compressed data is not needed anymore.
+     */
+
+    ri.Free(CompressedData);
+
+    /*
+     *  Check if the last puff() was successfull.
+     */
+
+    if(!((puffResult == 0) && (puffDestLen > 0)))
+    {
+        ri.Free(DecompressedData);
+ 
+        return(-1);
+    }
+
+    /*
+     *  Set the output of this function.
+     */
+
+    DecompressedDataLength = puffDestLen;
+    *Buffer = DecompressedData;
+
+    return(DecompressedDataLength);
+}
+
+/*
+ *  the Paeth predictor
+ */
+
+static uint8_t PredictPaeth(uint8_t a, uint8_t b, uint8_t c)
+{
+    /*
+     *  a == Left
+     *  b == Up
+     *  c == UpLeft
+     */
+
+    uint8_t Pr;
+    int p;
+    int pa, pb, pc;
+
+    Pr = 0;
+
+    p  = ((int) a) + ((int) b) - ((int) c);
+    pa = abs(p - ((int) a));
+    pb = abs(p - ((int) b));
+    pc = abs(p - ((int) c));
+
+    if((pa <= pb) && (pa <= pc))
+    {
+        Pr = a;
+    }
+    else if(pb <= pc)
+    {
+        Pr = b;
+    }
+    else
+    {
+        Pr = c;
+    }
+
+    return(Pr);
+
+}
+
+/*
+ *  Reverse the filters.
+ */
+
+static qboolean UnfilterImage(uint8_t  *DecompressedData, 
+                              uint32_t  ImageHeight,
+		              uint32_t  BytesPerScanline, 
+		              uint32_t  BytesPerPixel)
+{
+    uint8_t   *DecompPtr;
+    uint8_t   FilterType;
+    uint8_t  *PixelLeft, *PixelUp, *PixelUpLeft;
+    uint32_t  w, h, p;
+
+    /*
+     *  some zeros for the filters
+     */
+
+    uint8_t Zeros[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+    /*
+     *  input verification
+     *
+     *  ImageHeight and BytesPerScanline are not checked,
+     *  because these can be zero in some interlace passes.
+     */
+
+    if(!(DecompressedData && BytesPerPixel))
+    {
+	return(qfalse);
+    }
+
+
+    /*
+     *  Set the pointer to the start of the decompressed Data.
+     */
+
+    DecompPtr = DecompressedData;
+
+    /*
+     *  Un-filtering is done in place.
+     */
+
+    /*
+     *  Go trough all scanlines.
+     */
+
+    for(h = 0; h < ImageHeight; h++)
+    {
+        /*
+         *  Every scanline starts with a FilterType byte.
+         */
+
+        FilterType = *DecompPtr;
+        DecompPtr++;
+
+        /*
+         *  Left pixel of the first byte in a scanline is zero.
+         */
+
+        PixelLeft = Zeros;
+
+        /*
+         *  Set PixelUp to previous line only if we are on the second line or above.
+         *
+         *  Plus one byte for the FilterType
+         */
+
+        if(h > 0)
+        {
+            PixelUp = DecompPtr - (BytesPerScanline + 1);
+        }
+        else
+        {
+            PixelUp = Zeros;
+        }
+
+        /*
+         * The pixel left to the first pixel of the previous scanline is zero too.
+         */
+
+        PixelUpLeft = Zeros;
+
+        /*
+         *  Cycle trough all pixels of the scanline.
+         */
+
+        for(w = 0; w < (BytesPerScanline / BytesPerPixel); w++)
+        {
+            /*
+             *  Cycle trough the bytes of the pixel.
+             */
+
+            for(p = 0; p < BytesPerPixel; p++)
+            {
+                switch(FilterType)
+                { 
+                    case PNG_FilterType_None :
+                    {
+                        /*
+                         *  The byte is unfiltered.
+                         */
+
+                        break;
+                    }
+
+                    case PNG_FilterType_Sub :
+                    {
+                        DecompPtr[p] += PixelLeft[p];
+
+                        break;
+                    }
+
+		    case PNG_FilterType_Up :
+                    {
+                        DecompPtr[p] += PixelUp[p];
+
+                        break;
+                    }
+
+                    case PNG_FilterType_Average :
+                    {
+                        DecompPtr[p] += ((uint8_t) ((((uint16_t) PixelLeft[p]) + ((uint16_t) PixelUp[p])) / 2));
+
+                        break;
+                    }
+
+                    case PNG_FilterType_Paeth :
+                    {
+                        DecompPtr[p] += PredictPaeth(PixelLeft[p], PixelUp[p], PixelUpLeft[p]);
+
+                        break;
+                    }
+
+                    default :
+                    {
+                        return(qfalse);
+                    }
+                }
+            }
+   
+            PixelLeft = DecompPtr;
+
+            /*
+             *  We only have a upleft pixel if we are on the second line or above.
+             */
+
+            if(h > 0)
+            {
+                PixelUpLeft = DecompPtr - (BytesPerScanline + 1);
+            }
+
+	    /*
+             *  Skip to the next pixel.
+             */
+
+            DecompPtr += BytesPerPixel;
+	 
+            /*
+             *  We only have a previous line if we are on the second line and above.
+             */
+
+            if(h > 0)
+            {
+                PixelUp = DecompPtr - (BytesPerScanline + 1);
+            }
+        }
+    }
+
+ return(qtrue);
+}
+
+/*
+ *  Convert a raw input pixel to Quake 3 RGA format.
+ */
+
+static qboolean ConvertPixel(struct PNG_Chunk_IHDR *IHDR,
+			     byte                  *OutPtr,
+			     uint8_t               *DecompPtr,
+                             qboolean               HasTransparentColour,
+                             uint8_t               *TransparentColour,
+                             uint8_t               *OutPal)
+{
+    /*
+     *  input verification
+     */
+    
+    if(!(IHDR && OutPtr && DecompPtr && TransparentColour && OutPal))
+    {
+     return(qfalse);
+    }
+
+    switch(IHDR->ColourType)
+    {
+        case PNG_ColourType_Grey :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_1 :
+                case PNG_BitDepth_2 :
+                case PNG_BitDepth_4 :
+                {
+    		    uint8_t Step;
+                    uint8_t GreyValue;
+
+                    Step = 0xFF / ((1 << IHDR->BitDepth) - 1);
+
+                    GreyValue = DecompPtr[0] * Step;
+  
+                    OutPtr[0] = GreyValue;
+                    OutPtr[1] = GreyValue;
+                    OutPtr[2] = GreyValue;
+                    OutPtr[3] = 0xFF;
+
+                    /*
+                     *  Grey supports full transparency for one specified colour
+                     */
+
+                    if(HasTransparentColour)
+                    {
+                        if(TransparentColour[1] == DecompPtr[0])
+                        {
+                            OutPtr[3] = 0x00;
+                        }
+                    }
+	
+
+                    break;
+                }
+      
+                case PNG_BitDepth_8 :
+                case PNG_BitDepth_16 :
+                {
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[0];
+                    OutPtr[2] = DecompPtr[0];
+                    OutPtr[3] = 0xFF;
+      
+                    /*
+                     *  Grey supports full transparency for one specified colour
+                     */
+
+                    if(HasTransparentColour)
+                    {
+                        if(IHDR->BitDepth == PNG_BitDepth_8)
+                        {
+                            if(TransparentColour[1] == DecompPtr[0])
+                            {
+                                OutPtr[3] = 0x00;
+                            }
+                        }
+                        else
+                        {
+                            if((TransparentColour[0] == DecompPtr[0]) && (TransparentColour[1] == DecompPtr[1]))
+                            {
+                                OutPtr[3] = 0x00;
+                            }
+                        }
+                    }
+
+                    break;
+                }
+      
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+    
+            break;
+        }
+
+        case PNG_ColourType_True :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                {
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[1];
+                    OutPtr[2] = DecompPtr[2];
+                    OutPtr[3] = 0xFF;
+      
+                    /*
+                     *  True supports full transparency for one specified colour
+                     */
+
+                    if(HasTransparentColour)
+                    {
+                        if((TransparentColour[1] == DecompPtr[0]) &&
+                           (TransparentColour[3] == DecompPtr[1]) &&
+                           (TransparentColour[5] == DecompPtr[3]))
+                        {
+                            OutPtr[3] = 0x00;
+                        }
+                    }
+
+                    break;
+                }
+      
+                case PNG_BitDepth_16 :
+                {
+                    /*
+                     *  We use only the upper byte.
+                     */
+
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[2];
+                    OutPtr[2] = DecompPtr[4];
+                    OutPtr[3] = 0xFF;
+      
+                    /*
+                     *  True supports full transparency for one specified colour
+                     */
+
+                    if(HasTransparentColour)
+                    {
+                        if((TransparentColour[0] == DecompPtr[0]) && (TransparentColour[1] == DecompPtr[1]) &&
+                           (TransparentColour[2] == DecompPtr[2]) && (TransparentColour[3] == DecompPtr[3]) &&
+                           (TransparentColour[4] == DecompPtr[4]) && (TransparentColour[5] == DecompPtr[5]))
+                        {
+                            OutPtr[3] = 0x00;
+                        }
+                    }
+
+                    break;
+                }
+
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+
+            break;
+        }
+
+        case PNG_ColourType_Indexed :
+        {
+            OutPtr[0] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 0];
+            OutPtr[1] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 1];
+            OutPtr[2] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 2];
+            OutPtr[3] = OutPal[DecompPtr[0] * Q3IMAGE_BYTESPERPIXEL + 3];
+	
+            break;
+        }
+
+        case PNG_ColourType_GreyAlpha :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                {
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[0];
+                    OutPtr[2] = DecompPtr[0];
+                    OutPtr[3] = DecompPtr[1];
+      
+                    break;
+                }
+  
+                case PNG_BitDepth_16 :
+                {
+                    /*
+                     *  We use only the upper byte.
+                     */
+
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[0];
+                    OutPtr[2] = DecompPtr[0];
+                    OutPtr[3] = DecompPtr[2];
+      
+                    break;
+                }
+
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+
+            break;
+        }
+
+        case PNG_ColourType_TrueAlpha :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                {
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[1];
+                    OutPtr[2] = DecompPtr[2];
+                    OutPtr[3] = DecompPtr[3];
+      
+                    break;
+                }
+      
+                case PNG_BitDepth_16 :
+                {
+                    /*
+                     *  We use only the upper byte.
+                     */
+
+                    OutPtr[0] = DecompPtr[0];
+                    OutPtr[1] = DecompPtr[2];
+                    OutPtr[2] = DecompPtr[4];
+                    OutPtr[3] = DecompPtr[6];
+      
+                    break;
+                }
+
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+
+            break;
+        }
+
+        default :
+        {
+            return(qfalse);
+        }
+    }
+
+    return(qtrue);
+}
+
+
+/*
+ *  Decode a non-interlaced image.
+ */
+
+static qboolean DecodeImageNonInterlaced(struct PNG_Chunk_IHDR *IHDR,
+                                         byte                  *OutBuffer, 
+                                         uint8_t               *DecompressedData,
+                                         uint32_t               DecompressedDataLength,
+                                         qboolean               HasTransparentColour,
+                                         uint8_t               *TransparentColour,
+                                         uint8_t               *OutPal)
+{
+    uint32_t IHDR_Width;
+    uint32_t IHDR_Height;
+    uint32_t BytesPerScanline, BytesPerPixel, PixelsPerByte;
+    uint32_t  w, h, p;
+    byte *OutPtr;
+    uint8_t *DecompPtr;
+
+    /*
+     *  input verification
+     */
+
+    if(!(IHDR && OutBuffer && DecompressedData && DecompressedDataLength && TransparentColour && OutPal))
+    {
+	return(qfalse);
+    }
+
+    /*
+     *  byte swapping
+     */
+     
+    IHDR_Width  = BigLong(IHDR->Width);
+    IHDR_Height = BigLong(IHDR->Height);
+
+    /*
+     *  information for un-filtering
+     */
+
+    switch(IHDR->ColourType)
+    {
+        case PNG_ColourType_Grey :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_1 :
+                case PNG_BitDepth_2 :
+                case PNG_BitDepth_4 :
+                {
+                    BytesPerPixel    = 1;
+                    PixelsPerByte    = 8 / IHDR->BitDepth;
+
+                    break;
+                }
+
+                case PNG_BitDepth_8  :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_Grey;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+  
+        case PNG_ColourType_True :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8  :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_True;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+     
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+
+        case PNG_ColourType_Indexed :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_1 :
+                case PNG_BitDepth_2 :
+                case PNG_BitDepth_4 :
+                {
+                    BytesPerPixel    = 1;
+                    PixelsPerByte    = 8 / IHDR->BitDepth;
+
+                    break;
+                }
+
+                case PNG_BitDepth_8 :
+                {
+                    BytesPerPixel    = PNG_NumColourComponents_Indexed;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+         
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+
+        case PNG_ColourType_GreyAlpha :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_GreyAlpha;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+     
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+
+        case PNG_ColourType_TrueAlpha :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_TrueAlpha;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+    
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+
+            break;
+        }
+
+        default :
+        {
+            return(qfalse);
+        }
+    }
+
+    /*
+     *  Calculate the size of one scanline
+     */
+
+    BytesPerScanline = (IHDR_Width * BytesPerPixel + (PixelsPerByte - 1)) / PixelsPerByte;
+
+    /*
+     *  Check if we have enough data for the whole image.
+     */
+
+    if(!(DecompressedDataLength == ((BytesPerScanline + 1) * IHDR_Height)))
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  Unfilter the image.
+     */
+
+    if(!UnfilterImage(DecompressedData, IHDR_Height, BytesPerScanline, BytesPerPixel))
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  Set the working pointers to the beginning of the buffers.
+     */
+
+    OutPtr = OutBuffer;
+    DecompPtr = DecompressedData;
+
+    /*
+     *  Create the output image.
+     */
+
+    for(h = 0; h < IHDR_Height; h++)
+    {
+        /*
+         *  Count the pixels on the scanline for those multipixel bytes
+         */
+
+        uint32_t CurrPixel;
+  
+        /*
+         *  skip FilterType
+         */
+
+        DecompPtr++;
+
+        /*
+         *  Reset the pixel count.
+         */
+
+        CurrPixel = 0;
+
+        for(w = 0; w < (BytesPerScanline / BytesPerPixel); w++)
+        {
+	    if(PixelsPerByte > 1)
+	    {
+                uint8_t  Mask;
+                uint32_t Shift;
+		uint8_t  SinglePixel;
+
+                for(p = 0; p < PixelsPerByte; p++)
+                {
+                    if(CurrPixel < IHDR_Width)
+                    {
+                        Mask  = (1 << IHDR->BitDepth) - 1;
+                        Shift = (PixelsPerByte - 1 - p) * IHDR->BitDepth;
+
+                        SinglePixel = ((DecompPtr[0] & (Mask << Shift)) >> Shift);
+
+			if(!ConvertPixel(IHDR, OutPtr, &SinglePixel, HasTransparentColour, TransparentColour, OutPal))
+			{
+			    return(qfalse);
+			}
+
+                        OutPtr += Q3IMAGE_BYTESPERPIXEL;
+                        CurrPixel++;
+                    }
+                }
+	    
+	    }
+	    else
+	    {
+		if(!ConvertPixel(IHDR, OutPtr, DecompPtr, HasTransparentColour, TransparentColour, OutPal))
+		{
+		    return(qfalse);
+		}
+  
+
+                OutPtr += Q3IMAGE_BYTESPERPIXEL;
+	    }
+
+            DecompPtr += BytesPerPixel;
+        }
+    }
+
+    return(qtrue);
+}
+
+/*
+ *  Decode an interlaced image.
+ */
+
+static qboolean DecodeImageInterlaced(struct PNG_Chunk_IHDR *IHDR,
+                                      byte                  *OutBuffer, 
+                                      uint8_t               *DecompressedData,
+                                      uint32_t               DecompressedDataLength,
+                                      qboolean               HasTransparentColour,
+                                      uint8_t               *TransparentColour,
+                                      uint8_t               *OutPal)
+{
+    uint32_t IHDR_Width;
+    uint32_t IHDR_Height;
+    uint32_t BytesPerScanline[PNG_Adam7_NumPasses], BytesPerPixel, PixelsPerByte;
+    uint32_t PassWidth[PNG_Adam7_NumPasses], PassHeight[PNG_Adam7_NumPasses];
+    uint32_t WSkip[PNG_Adam7_NumPasses], WOffset[PNG_Adam7_NumPasses], HSkip[PNG_Adam7_NumPasses], HOffset[PNG_Adam7_NumPasses];
+    uint32_t w, h, p, a;
+    byte *OutPtr;
+    uint8_t *DecompPtr;
+    uint32_t TargetLength;
+
+    /*
+     *  input verification
+     */
+
+    if(!(IHDR && OutBuffer && DecompressedData && DecompressedDataLength && TransparentColour && OutPal))
+    {
+	return(qfalse);
+    }
+
+    /*
+     *  byte swapping
+     */
+
+    IHDR_Width  = BigLong(IHDR->Width);
+    IHDR_Height = BigLong(IHDR->Height);
+
+    /*
+     *  Skip and Offset for the passes.
+     */
+
+    WSkip[0]   = 8;
+    WOffset[0] = 0;
+    HSkip[0]   = 8;
+    HOffset[0] = 0;
+
+    WSkip[1]   = 8;
+    WOffset[1] = 4;
+    HSkip[1]   = 8;
+    HOffset[1] = 0;
+
+    WSkip[2]   = 4;
+    WOffset[2] = 0;
+    HSkip[2]   = 8;
+    HOffset[2] = 4;
+
+    WSkip[3]   = 4;
+    WOffset[3] = 2;
+    HSkip[3]   = 4;
+    HOffset[3] = 0;
+
+    WSkip[4]   = 2;
+    WOffset[4] = 0;
+    HSkip[4]   = 4;
+    HOffset[4] = 2;
+
+    WSkip[5]   = 2;
+    WOffset[5] = 1;
+    HSkip[5]   = 2;
+    HOffset[5] = 0;
+
+    WSkip[6]   = 1;
+    WOffset[6] = 0;
+    HSkip[6]   = 2;
+    HOffset[6] = 1;
+
+    /*
+     *  Calculate the sizes of the passes.
+     */
+
+    PassWidth[0]  = (IHDR_Width  + 7) / 8;
+    PassHeight[0] = (IHDR_Height + 7) / 8;
+
+    PassWidth[1]  = (IHDR_Width  + 3) / 8;
+    PassHeight[1] = (IHDR_Height + 7) / 8;
+
+    PassWidth[2]  = (IHDR_Width  + 3) / 4;
+    PassHeight[2] = (IHDR_Height + 3) / 8;
+
+    PassWidth[3]  = (IHDR_Width  + 1) / 4;
+    PassHeight[3] = (IHDR_Height + 3) / 4;
+
+    PassWidth[4]  = (IHDR_Width  + 1) / 2;
+    PassHeight[4] = (IHDR_Height + 1) / 4;
+
+    PassWidth[5]  = (IHDR_Width  + 0) / 2;
+    PassHeight[5] = (IHDR_Height + 1) / 2;
+
+    PassWidth[6]  = (IHDR_Width  + 0) / 1;
+    PassHeight[6] = (IHDR_Height + 0) / 2;
+
+    /*
+     *  information for un-filtering
+     */
+
+    switch(IHDR->ColourType)
+    {
+        case PNG_ColourType_Grey :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_1 :
+                case PNG_BitDepth_2 :
+                case PNG_BitDepth_4 :
+                {
+                    BytesPerPixel    = 1;
+                    PixelsPerByte    = 8 / IHDR->BitDepth;
+
+                    break;
+                }
+
+                case PNG_BitDepth_8  :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_Grey;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+  
+        case PNG_ColourType_True :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8  :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_True;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+     
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+
+        case PNG_ColourType_Indexed :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_1 :
+                case PNG_BitDepth_2 :
+                case PNG_BitDepth_4 :
+                {
+                    BytesPerPixel    = 1;
+                    PixelsPerByte    = 8 / IHDR->BitDepth;
+
+                    break;
+                }
+
+                case PNG_BitDepth_8 :
+                {
+                    BytesPerPixel    = PNG_NumColourComponents_Indexed;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+         
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+
+        case PNG_ColourType_GreyAlpha :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_GreyAlpha;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+     
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+  
+            break;
+        }
+
+        case PNG_ColourType_TrueAlpha :
+        {
+            switch(IHDR->BitDepth)
+            {
+                case PNG_BitDepth_8 :
+                case PNG_BitDepth_16 :
+                {
+                    BytesPerPixel    = (IHDR->BitDepth / 8) * PNG_NumColourComponents_TrueAlpha;
+                    PixelsPerByte    = 1;
+
+                    break;
+                }
+    
+                default :
+                {
+                    return(qfalse);
+                }
+            }
+
+            break;
+        }
+
+        default :
+        {
+            return(qfalse);
+        }
+    }
+
+    /*
+     *  Calculate the size of the scanlines per pass
+     */
+
+    for(a = 0; a < PNG_Adam7_NumPasses; a++)
+    {
+	BytesPerScanline[a] = (PassWidth[a] * BytesPerPixel + (PixelsPerByte - 1)) / PixelsPerByte;
+    }
+
+    /*
+     *  Calculate the size of all passes
+     */
+
+    TargetLength = 0;
+
+    for(a = 0; a < PNG_Adam7_NumPasses; a++)
+    {
+	TargetLength += ((BytesPerScanline[a] + (BytesPerScanline[a] ? 1 : 0)) * PassHeight[a]);
+    }
+
+    /*
+     *  Check if we have enough data for the whole image.
+     */
+
+    if(!(DecompressedDataLength == TargetLength))
+    {
+        return(qfalse);
+    }
+
+    /*
+     *  Unfilter the image.
+     */
+
+    DecompPtr = DecompressedData;
+
+    for(a = 0; a < PNG_Adam7_NumPasses; a++)
+    {
+        if(!UnfilterImage(DecompPtr, PassHeight[a], BytesPerScanline[a], BytesPerPixel))
+        {
+            return(qfalse);
+        }
+	
+	DecompPtr += ((BytesPerScanline[a] + (BytesPerScanline[a] ? 1 : 0)) * PassHeight[a]);
+    }
+
+    /*
+     *  Set the working pointers to the beginning of the buffers.
+     */
+
+    DecompPtr = DecompressedData;
+
+    /*
+     *  Create the output image.
+     */
+
+    for(a = 0; a < PNG_Adam7_NumPasses; a++)
+    {
+        for(h = 0; h < PassHeight[a]; h++)
+        {
+            /*
+             *  Count the pixels on the scanline for those multipixel bytes
+             */
+
+            uint32_t CurrPixel;
+
+            /*
+             *  skip FilterType
+             */
+
+            DecompPtr++;
+
+            /*
+             *  Reset the pixel count.
+             */
+
+            CurrPixel = 0;
+
+            for(w = 0; w < (BytesPerScanline[a] / BytesPerPixel); w++)
+            {
+        	if(PixelsPerByte > 1)
+	        {
+                    uint8_t  Mask;
+                    uint32_t Shift;
+		    uint8_t  SinglePixel;
+
+                    for(p = 0; p < PixelsPerByte; p++)
+                    {
+                        if(CurrPixel < PassWidth[a])
+                        {
+                            Mask  = (1 << IHDR->BitDepth) - 1;
+                            Shift = (PixelsPerByte - 1 - p) * IHDR->BitDepth;
+
+                            SinglePixel = ((DecompPtr[0] & (Mask << Shift)) >> Shift);
+
+    			    OutPtr = OutBuffer + (((((h * HSkip[a]) + HOffset[a]) * IHDR_Width) + ((CurrPixel * WSkip[a]) + WOffset[a])) * Q3IMAGE_BYTESPERPIXEL);
+
+    			    if(!ConvertPixel(IHDR, OutPtr, &SinglePixel, HasTransparentColour, TransparentColour, OutPal))
+			    {
+			        return(qfalse);
+			    }
+
+                            CurrPixel++;
+                        }
+                    }
+	    
+	        }
+    	        else
+	        {
+	    	    OutPtr = OutBuffer + (((((h * HSkip[a]) + HOffset[a]) * IHDR_Width) + ((w * WSkip[a]) + WOffset[a])) * Q3IMAGE_BYTESPERPIXEL);
+
+		    if(!ConvertPixel(IHDR, OutPtr, DecompPtr, HasTransparentColour, TransparentColour, OutPal))
+		    {
+		        return(qfalse);
+		    }
+	        }
+
+                DecompPtr += BytesPerPixel;
+            }
+        }
+    }
+
+    return(qtrue);
+}
+
+/*
+ *  The PNG loader
+ */
+
+static void LoadPNG(const char *name, byte **pic, int *width, int *height)
+{
+    struct BufferedFile *ThePNG;
+    byte *OutBuffer;
+    uint8_t *Signature;
+    struct PNG_ChunkHeader *CH;
+    uint32_t ChunkHeaderLength;
+    uint32_t ChunkHeaderType;
+    struct PNG_Chunk_IHDR *IHDR;
+    uint32_t IHDR_Width;
+    uint32_t IHDR_Height;
+    PNG_ChunkCRC *CRC;
+    uint8_t *InPal;
+    uint8_t *DecompressedData;
+    uint32_t DecompressedDataLength;
+    uint32_t i;
+
+    /*
+     *  palette with 256 RGBA entries
+     */
+
+    uint8_t OutPal[1024];
+
+    /*
+     *  transparent colour from the tRNS chunk
+     */
+
+    qboolean HasTransparentColour = qfalse;
+    uint8_t TransparentColour[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+
+    /*
+     *  input verification
+     */
+
+    if(!(name && pic))
+    {
+        return;
+    }
+
+    /*
+     *  Zero out return values.
+     */
+
+    *pic = NULL;
+
+    if(width)
+    {
+        *width = 0;
+    }
+
+    if(height)
+    {
+        *height = 0;
+    }
+
+    /*
+     *  Read the file.
+     */
+
+    ThePNG = ReadBufferedFile(name);
+    if(!ThePNG)
+    {
+        return;
+    }           
+
+    /*
+     *  Read the siganture of the file.
+     */
+
+    Signature = BufferedFileRead(ThePNG, PNG_Signature_Size);
+    if(!Signature)
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return;
+    }
+ 
+    /*
+     *  Is it a PNG?
+     */
+
+    if(memcmp(Signature, PNG_Signature, PNG_Signature_Size))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  Read the first chunk-header.
+     */
+
+    CH = BufferedFileRead(ThePNG, PNG_ChunkHeader_Size);
+    if(!CH)
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  PNG multi-byte types are in Big Endian
+     */
+
+    ChunkHeaderLength = BigLong(CH->Length);
+    ChunkHeaderType   = BigLong(CH->Type);
+
+    /*
+     *  Check if the first chunk is an IHDR.
+     */
+
+    if(!((ChunkHeaderType == PNG_ChunkType_IHDR) && (ChunkHeaderLength == PNG_Chunk_IHDR_Size)))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  Read the IHDR.
+     */ 
+
+    IHDR = BufferedFileRead(ThePNG, PNG_Chunk_IHDR_Size);
+    if(!IHDR)
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  Read the CRC for IHDR
+     */
+
+    CRC = BufferedFileRead(ThePNG, PNG_ChunkCRC_Size);
+    if(!CRC)
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+ 
+    /*
+     *  Here we could check the CRC if we wanted to.
+     */
+ 
+    /*
+     *  multi-byte type swapping
+     */
+
+    IHDR_Width  = BigLong(IHDR->Width);
+    IHDR_Height = BigLong(IHDR->Height);
+ 
+    /*
+     *  Check if Width and Height are valid.
+     */
+
+    if(!((IHDR_Width > 0) && (IHDR_Height > 0)))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  Do we need to check if the dimensions of the image are valid for Quake3?
+     */
+
+    /*
+     *  Check if CompressionMethod and FilterMethod are valid.
+     */
+
+    if(!((IHDR->CompressionMethod == PNG_CompressionMethod_0) && (IHDR->FilterMethod == PNG_FilterMethod_0)))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  Check if InterlaceMethod is valid.
+     */
+
+    if(!((IHDR->InterlaceMethod == PNG_InterlaceMethod_NonInterlaced)  || (IHDR->InterlaceMethod == PNG_InterlaceMethod_Interlaced)))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return;
+    }
+
+    /*
+     *  Read palette for an indexed image.
+     */
+
+    if(IHDR->ColourType == PNG_ColourType_Indexed)
+    {
+        /*
+         *  We need the palette first.
+         */
+
+        if(!FindChunk(ThePNG, PNG_ChunkType_PLTE))
+        {
+            CloseBufferedFile(ThePNG);
+  
+            return;
+        }
+
+        /*
+         *  Read the chunk-header.
+         */
+
+        CH = BufferedFileRead(ThePNG, PNG_ChunkHeader_Size);
+        if(!CH)
+        {
+            CloseBufferedFile(ThePNG);
+   
+            return; 
+        }
+
+        /*
+         *  PNG multi-byte types are in Big Endian
+         */
+
+        ChunkHeaderLength = BigLong(CH->Length);
+        ChunkHeaderType   = BigLong(CH->Type);
+  
+        /*
+         *  Check if the chunk is an PLTE.
+         */
+
+        if(!(ChunkHeaderType == PNG_ChunkType_PLTE))
+        {
+            CloseBufferedFile(ThePNG);
+   
+            return; 
+        }
+
+        /*
+         *  Check if Length is divisible by 3
+         */
+
+        if(ChunkHeaderLength % 3)
+        {
+            CloseBufferedFile(ThePNG);
+   
+            return;   
+        }
+
+        /*
+         *  Read the raw palette data
+         */
+
+        InPal = BufferedFileRead(ThePNG, ChunkHeaderLength);
+        if(!InPal)
+        {
+            CloseBufferedFile(ThePNG);
+   
+            return; 
+        }
+   
+        /*
+         *  Read the CRC for the palette
+         */
+
+        CRC = BufferedFileRead(ThePNG, PNG_ChunkCRC_Size);
+        if(!CRC)
+        {
+            CloseBufferedFile(ThePNG);
+ 
+            return; 
+        }
+
+        /*
+         *  Set some default values.
+         */
+
+        for(i = 0; i < 256; i++)
+        {
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 0] = 0x00;
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 1] = 0x00;
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 2] = 0x00;
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 3] = 0xFF;  
+        }
+
+        /*
+         *  Convert to the Quake3 RGBA-format.
+         */
+
+        for(i = 0; i < (ChunkHeaderLength / 3); i++)
+        {
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 0] = InPal[i*3+0];
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 1] = InPal[i*3+1];
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 2] = InPal[i*3+2];
+            OutPal[i * Q3IMAGE_BYTESPERPIXEL + 3] = 0xFF;
+        }
+    }
+
+    /*
+     *  transparency information is sometimes stored in an tRNS chunk
+     */
+
+    /*
+     *  Let's see if there is a tRNS chunk
+     */
+
+    if(FindChunk(ThePNG, PNG_ChunkType_tRNS))
+    {
+        uint8_t *Trans;
+
+        /*
+         *  Read the chunk-header.
+         */
+
+        CH = BufferedFileRead(ThePNG, PNG_ChunkHeader_Size);
+        if(!CH)
+        {
+            CloseBufferedFile(ThePNG);
+ 
+            return; 
+        }
+
+        /*
+         *  PNG multi-byte types are in Big Endian
+         */
+
+        ChunkHeaderLength = BigLong(CH->Length);
+        ChunkHeaderType   = BigLong(CH->Type);
+
+        /*
+         *  Check if the chunk is an tRNS.
+         */
+
+        if(!(ChunkHeaderType == PNG_ChunkType_tRNS))
+        {
+            CloseBufferedFile(ThePNG);
+ 
+            return; 
+        }
+
+        /*
+         *  Read the transparency information.
+         */
+
+        Trans = BufferedFileRead(ThePNG, ChunkHeaderLength);
+        if(!Trans)
+        {
+            CloseBufferedFile(ThePNG);
+ 
+            return;  
+        }
+
+        /*
+         *  Read the CRC.
+         */
+
+        CRC = BufferedFileRead(ThePNG, PNG_ChunkCRC_Size);
+        if(!CRC)
+        {
+            CloseBufferedFile(ThePNG);
+  
+            return; 
+        }
+ 
+        /*
+         *  Only for Grey, True and Indexed ColourType should tRNS exist.
+         */
+
+        switch(IHDR->ColourType)
+        {
+            case PNG_ColourType_Grey :
+            {
+                if(!ChunkHeaderLength == 2)
+                {
+                    CloseBufferedFile(ThePNG);
+  
+                    return;    
+                }
+   
+                HasTransparentColour = qtrue;
+   
+		/*
+		 *  Grey can have one colour which is completely transparent.
+		 *  This colour is always stored in 16 bits.
+		 */
+
+                TransparentColour[0] = Trans[0];
+                TransparentColour[1] = Trans[1];
+   
+                break;
+            }
+   
+            case PNG_ColourType_True :
+            {
+                if(!ChunkHeaderLength == 6)
+                {
+                    CloseBufferedFile(ThePNG);
+  
+                    return;    
+                }
+   
+                HasTransparentColour = qtrue;
+
+		/*
+		 *  True can have one colour which is completely transparent.
+		 *  This colour is always stored in 16 bits.
+		 */
+
+                TransparentColour[0] = Trans[0];
+                TransparentColour[1] = Trans[1];
+                TransparentColour[2] = Trans[2];
+                TransparentColour[3] = Trans[3];
+                TransparentColour[4] = Trans[4];
+                TransparentColour[5] = Trans[5];
+   
+                break;
+            }
+   
+            case PNG_ColourType_Indexed :
+            {
+                /*
+		 *  Maximum of 256 one byte transparency entries.
+		 */
+		
+		if(ChunkHeaderLength > 256)
+                {
+                    CloseBufferedFile(ThePNG);
+  
+                    return;    
+                }
+
+                HasTransparentColour = qtrue;
+
+                /*
+                 *  alpha values for palette entries
+                 */
+
+                for(i = 0; i < ChunkHeaderLength; i++)
+                {
+                    OutPal[i * Q3IMAGE_BYTESPERPIXEL + 3] = Trans[i];
+                }
+
+                break;
+            }
+  
+            /*
+             *  All other ColourTypes should not have tRNS chunks
+             */
+
+            default :
+            {
+                CloseBufferedFile(ThePNG);
+  
+                return;
+            }
+        } 
+    }
+
+    /*
+     *  Rewind to the start of the file.
+     */
+
+    if(!BufferedFileRewind(ThePNG, -1))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+ 
+    /*
+     *  Skip the signature
+     */
+
+    if(!BufferedFileSkip(ThePNG, PNG_Signature_Size))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return; 
+    }
+
+    /*
+     *  Decompress all IDAT chunks
+     */
+
+    DecompressedDataLength = DecompressIDATs(ThePNG, &DecompressedData);
+    if(!(DecompressedDataLength && DecompressedData))
+    {
+        CloseBufferedFile(ThePNG);
+ 
+        return;
+    }
+
+    /*
+     *  Allocate output buffer.
+     */
+
+    OutBuffer = ri.Malloc(IHDR_Width * IHDR_Height * Q3IMAGE_BYTESPERPIXEL); 
+    if(!OutBuffer)
+    {
+        ri.Free(DecompressedData); 
+        CloseBufferedFile(ThePNG);
+ 
+        return;  
+    }
+
+    /*
+     *  Interlaced and Non-interlaced images need to be handled differently.
+     */
+
+    switch(IHDR->InterlaceMethod)
+    {
+	case PNG_InterlaceMethod_NonInterlaced :
+	{
+	    if(!DecodeImageNonInterlaced(IHDR, OutBuffer, DecompressedData, DecompressedDataLength, HasTransparentColour, TransparentColour, OutPal))
+	    {
+		ri.Free(OutBuffer); 
+    		ri.Free(DecompressedData); 
+    		CloseBufferedFile(ThePNG);
+
+		return;
+	    }
+	
+	    break;
+	}
+	
+	case PNG_InterlaceMethod_Interlaced :
+	{
+	    if(!DecodeImageInterlaced(IHDR, OutBuffer, DecompressedData, DecompressedDataLength, HasTransparentColour, TransparentColour, OutPal))
+	    {
+		ri.Free(OutBuffer); 
+    		ri.Free(DecompressedData); 
+    		CloseBufferedFile(ThePNG);
+
+		return;
+	    }
+	
+	    break;
+	}
+    
+	default :
+	{
+	    ri.Free(OutBuffer); 
+    	    ri.Free(DecompressedData); 
+    	    CloseBufferedFile(ThePNG);
+
+	    return;
+	}
+    }
+
+    /*
+     *  update the pointer to the image data
+     */
+
+    *pic = OutBuffer;
+ 
+    /*
+     *  Fill width and height.
+     */
+
+    if(width)
+    {
+        *width = IHDR_Width;
+    }
+
+    if(height)
+    {
+        *height = IHDR_Height;
+    }
+
+    /*
+     *  DecompressedData is not needed anymore.
+     */
+
+    ri.Free(DecompressedData); 
+
+    /*
+     *  We have all data, so close the file.
+     */
+
+    CloseBufferedFile(ThePNG);
+}
+
+//===================================================================
+
+/*
+=================
 R_LoadImage
 
 Loads any of the supported image types into a cannonical
@@ -1951,23 +4398,41 @@ void R_LoadImage( const char *name, byte **pic, int *width, int *height ) {
 	}
 
 	if ( !Q_stricmp( name+len-4, ".tga" ) ) {
-	  LoadTGA( name, pic, width, height );            // try tga first
-    if (!*pic) {                                    //
-		  char altname[MAX_QPATH];                      // try jpg in place of tga 
-      strcpy( altname, name );                      
-      len = strlen( altname );                  
-      altname[len-3] = 'j';
-      altname[len-2] = 'p';
-      altname[len-1] = 'g';
+		LoadTGA( name, pic, width, height );
+
+		// This is a hack to get around the fact that some
+		// baseq3 shaders refer to tga files where the images
+		// are actually jpgs
+		if (!*pic) {
+			// try jpg in place of tga 
+			char altname[MAX_QPATH];
+
+			strcpy( altname, name );
+			len = strlen( altname );
+			altname[len-3] = 'j';
+			altname[len-2] = 'p';
+			altname[len-1] = 'g';
+
+			ri.Printf( PRINT_DEVELOPER, "WARNING: %s failed, trying %s\n", name, altname );
 			LoadJPG( altname, pic, width, height );
 		}
-  } else if ( !Q_stricmp(name+len-4, ".pcx") ) {
-    LoadPCX32( name, pic, width, height );
-	} else if ( !Q_stricmp( name+len-4, ".bmp" ) ) {
+	}
+	else if ( !Q_stricmp(name+len-4, ".pcx") )
+	{
+		LoadPCX32( name, pic, width, height );
+	}
+	else if ( !Q_stricmp( name+len-4, ".bmp" ) )
+	{
 		LoadBMP( name, pic, width, height );
-	} else if ( !Q_stricmp( name+len-4, ".jpg" ) ) {
+	}
+	else if ( !Q_stricmp( name+len-4, ".jpg" ) )
+	{
 		LoadJPG( name, pic, width, height );
 	}
+	else if ( !Q_stricmp( name+len-4, ".png" ) )
+	{
+		LoadPNG( name, pic, width, height );
+	}
 }
 
 
@@ -2016,19 +4481,8 @@ image_t	*R_FindImageFile( const char *name, qboolean mipmap, qboolean allowPicmi
 	// load the pic from disk
 	//
 	R_LoadImage( name, &pic, &width, &height );
-	if ( pic == NULL ) {                                    // if we dont get a successful load
-	  char altname[MAX_QPATH];                              // copy the name
-    int len;                                              //  
-    strcpy( altname, name );                              //
-    len = strlen( altname );                              // 
-    altname[len-3] = toupper(altname[len-3]);             // and try upper case extension for unix systems
-    altname[len-2] = toupper(altname[len-2]);             //
-    altname[len-1] = toupper(altname[len-1]);             //
-		ri.Printf( PRINT_ALL, "trying %s...\n", altname );    // 
-	  R_LoadImage( altname, &pic, &width, &height );        //
-    if (pic == NULL) {                                    // if that fails
-      return NULL;                                        // bail
-    }
+	if ( pic == NULL ) {
+		return NULL;
 	}
 
 	image = R_CreateImage( ( char * ) name, pic, width, height, mipmap, allowPicmip, glWrapClampMode );
diff --git a/src/renderer/tr_init.c b/src/renderer/tr_init.c
index b9b676a9..cede88a6 100644
--- a/src/renderer/tr_init.c
+++ b/src/renderer/tr_init.c
@@ -1161,7 +1161,7 @@ void R_Init( void ) {
 
 //	Swap_Init();
 
-	if ( (int)tess.xyz & 15 ) {
+	if ( (intptr_t)tess.xyz & 15 ) {
 		Com_Printf( "WARNING: tess.xyz not 16 byte aligned\n" );
 	}
 	Com_Memset( tess.constantColor255, 255, sizeof( tess.constantColor255 ) );
diff --git a/src/renderer/tr_shader.c b/src/renderer/tr_shader.c
index 10223a5c..dd51a974 100644
--- a/src/renderer/tr_shader.c
+++ b/src/renderer/tr_shader.c
@@ -2867,32 +2867,32 @@ static void ScanAndLoadShaderFiles( void )
 	char **shaderFiles;
 	char *buffers[MAX_SHADER_FILES];
 	char *p;
-	int numShaders;
+	int numShaderFiles;
 	int i;
 	char *oldp, *token, *hashMem;
 	int shaderTextHashTableSizes[MAX_SHADERTEXT_HASH], hash, size;
 
 	long sum = 0;
 	// scan for shader files
-	shaderFiles = ri.FS_ListFiles( "scripts", ".shader", &numShaders );
+	shaderFiles = ri.FS_ListFiles( "scripts", ".shader", &numShaderFiles );
 
-	if ( !shaderFiles || !numShaders )
+	if ( !shaderFiles || !numShaderFiles )
 	{
 		ri.Printf( PRINT_WARNING, "WARNING: no shader files found\n" );
 		return;
 	}
 
-	if ( numShaders > MAX_SHADER_FILES ) {
-		numShaders = MAX_SHADER_FILES;
+	if ( numShaderFiles > MAX_SHADER_FILES ) {
+		numShaderFiles = MAX_SHADER_FILES;
 	}
 
 	// load and parse shader files
-	for ( i = 0; i < numShaders; i++ )
+	for ( i = 0; i < numShaderFiles; i++ )
 	{
 		char filename[MAX_QPATH];
 
 		Com_sprintf( filename, sizeof( filename ), "scripts/%s", shaderFiles[i] );
-		ri.Printf( PRINT_ALL, "...loading '%s'\n", filename );
+		ri.Printf( PRINT_DEVELOPER, "...loading '%s'\n", filename );
 		sum += ri.FS_ReadFile( filename, (void **)&buffers[i] );
 		if ( !buffers[i] ) {
 			ri.Error( ERR_DROP, "Couldn't load %s", filename );
@@ -2900,16 +2900,16 @@ static void ScanAndLoadShaderFiles( void )
 	}
 
 	// build single large buffer
-	s_shaderText = ri.Hunk_Alloc( sum + numShaders*2, h_low );
+	s_shaderText = ri.Hunk_Alloc( sum + numShaderFiles*2, h_low );
+	s_shaderText[ 0 ] = '\0';
 
 	// free in reverse order, so the temp files are all dumped
-	for ( i = numShaders - 1; i >= 0 ; i-- ) {
-		strcat( s_shaderText, "\n" );
+	for ( i = numShaderFiles - 1; i >= 0 ; i-- ) {
 		p = &s_shaderText[strlen(s_shaderText)];
 		strcat( s_shaderText, buffers[i] );
 		ri.FS_FreeFile( buffers[i] );
-		buffers[i] = p;
 		COM_Compress(p);
+		strcat( s_shaderText, "\n" );
 	}
 
 	// free up memory
@@ -2917,28 +2917,19 @@ static void ScanAndLoadShaderFiles( void )
 
 	Com_Memset(shaderTextHashTableSizes, 0, sizeof(shaderTextHashTableSizes));
 	size = 0;
-	//
-	for ( i = 0; i < numShaders; i++ ) {
-		// pointer to the first shader file
-		p = buffers[i];
-		// look for label
-		while ( 1 ) {
-			token = COM_ParseExt( &p, qtrue );
-			if ( token[0] == 0 ) {
-				break;
-			}
 
-			hash = generateHashValue(token, MAX_SHADERTEXT_HASH);
-			shaderTextHashTableSizes[hash]++;
-			size++;
-			SkipBracedSection(&p);
-			// if we passed the pointer to the next shader file
-			if ( i < numShaders - 1 ) {
-				if ( p > buffers[i+1] ) {
-					break;
-				}
-			}
+	p = s_shaderText;
+	// look for shader names
+	while ( 1 ) {
+		token = COM_ParseExt( &p, qtrue );
+		if ( token[0] == 0 ) {
+			break;
 		}
+
+		hash = generateHashValue(token, MAX_SHADERTEXT_HASH);
+		shaderTextHashTableSizes[hash]++;
+		size++;
+		SkipBracedSection(&p);
 	}
 
 	size += MAX_SHADERTEXT_HASH;
@@ -2951,29 +2942,20 @@ static void ScanAndLoadShaderFiles( void )
 	}
 
 	Com_Memset(shaderTextHashTableSizes, 0, sizeof(shaderTextHashTableSizes));
-	//
-	for ( i = 0; i < numShaders; i++ ) {
-		// pointer to the first shader file
-		p = buffers[i];
-		// look for label
-		while ( 1 ) {
-			oldp = p;
-			token = COM_ParseExt( &p, qtrue );
-			if ( token[0] == 0 ) {
-				break;
-			}
-
-			hash = generateHashValue(token, MAX_SHADERTEXT_HASH);
-			shaderTextHashTable[hash][shaderTextHashTableSizes[hash]++] = oldp;
 
-			SkipBracedSection(&p);
-			// if we passed the pointer to the next shader file
-			if ( i < numShaders - 1 ) {
-				if ( p > buffers[i+1] ) {
-					break;
-				}
-			}
+	p = s_shaderText;
+	// look for shader names
+	while ( 1 ) {
+		oldp = p;
+		token = COM_ParseExt( &p, qtrue );
+		if ( token[0] == 0 ) {
+			break;
 		}
+
+		hash = generateHashValue(token, MAX_SHADERTEXT_HASH);
+		shaderTextHashTable[hash][shaderTextHashTableSizes[hash]++] = oldp;
+
+		SkipBracedSection(&p);
 	}
 
 	return;
diff --git a/src/unix/linux_signals.c b/src/unix/linux_signals.c
index 72c5a522..d3c44267 100644
--- a/src/unix/linux_signals.c
+++ b/src/unix/linux_signals.c
@@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "../renderer/tr_local.h"
 #endif
 
-static qboolean signalcaught = qfalse;;
+static qboolean signalcaught = qfalse;
 
 void Sys_Exit(int); // bk010104 - abstraction