diff options
| author | IronClawTrem <louie.nutman@gmail.com> | 2019-08-06 02:17:23 +0100 | 
|---|---|---|
| committer | IronClawTrem <louie.nutman@gmail.com> | 2019-08-06 02:17:23 +0100 | 
| commit | e5ff1d3d4bc01d98b12d05f9cb85457c7f15c424 (patch) | |
| tree | a18bc2026e95b76f820170855ea3a39aa5f03324 /src/tools/lcc/cpp | |
first commit
Diffstat (limited to 'src/tools/lcc/cpp')
| -rw-r--r-- | src/tools/lcc/cpp/cpp.c | 326 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/cpp.h | 166 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/eval.c | 524 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/getopt.c | 53 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/hideset.c | 112 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/include.c | 153 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/lex.c | 580 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/macro.c | 514 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/nlist.c | 104 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/tokens.c | 370 | ||||
| -rw-r--r-- | src/tools/lcc/cpp/unix.c | 134 | 
11 files changed, 3036 insertions, 0 deletions
diff --git a/src/tools/lcc/cpp/cpp.c b/src/tools/lcc/cpp/cpp.c new file mode 100644 index 0000000..5c0cfd7 --- /dev/null +++ b/src/tools/lcc/cpp/cpp.c @@ -0,0 +1,326 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <stdarg.h> +#include "cpp.h" + +char rcsid[] = "cpp.c - faked rcsid"; + +#define	OUTS	16384 +char	outbuf[OUTS]; +char	*outbufp = outbuf; +Source	*cursource; +int	nerrs; +struct	token nltoken = { NL, 0, 0, 0, 1, (uchar*)"\n" }; +char	*curtime; +int	incdepth; +int	ifdepth; +int	ifsatisfied[NIF]; +int	skipping; + + +int +main(int argc, char **argv) +{ +	Tokenrow tr; +	time_t t; +	char ebuf[BUFSIZ]; + +	setbuf(stderr, ebuf); +	t = time(NULL); +	curtime = ctime(&t); +	maketokenrow(3, &tr); +	expandlex(); +	setup(argc, argv); +	fixlex(); +	iniths(); +	genline(); +	process(&tr); +	flushout(); +	fflush(stderr); +	exit(nerrs > 0); +	return 0; +} + +void +process(Tokenrow *trp) +{ +	int anymacros = 0; + +	for (;;) { +		if (trp->tp >= trp->lp) { +			trp->tp = trp->lp = trp->bp; +			outbufp = outbuf; +			anymacros |= gettokens(trp, 1); +			trp->tp = trp->bp; +		} +		if (trp->tp->type == END) { +			if (--incdepth>=0) { +				if (cursource->ifdepth) +					error(ERROR, +					 "Unterminated conditional in #include"); +				unsetsource(); +				cursource->line += cursource->lineinc; +				trp->tp = trp->lp; +				genline(); +				continue; +			} +			if (ifdepth) +				error(ERROR, "Unterminated #if/#ifdef/#ifndef"); +			break; +		} +		if (trp->tp->type==SHARP) { +			trp->tp += 1; +			control(trp); +		} else if (!skipping && anymacros) +			expandrow(trp, NULL); +		if (skipping) +			setempty(trp); +		puttokens(trp); +		anymacros = 0; +		cursource->line += cursource->lineinc; +		if (cursource->lineinc>1) { +			genline(); +		} +	} +} +	 +void +control(Tokenrow *trp) +{ +	Nlist *np; +	Token *tp; + +	tp = trp->tp; +	if (tp->type!=NAME) { +		if (tp->type==NUMBER) +			goto kline; +		if (tp->type != NL) +			error(ERROR, "Unidentifiable control line"); +		return;			/* else empty line */ +	} +	if ((np = lookup(tp, 0))==NULL || ((np->flag&ISKW)==0 && !skipping)) { +		error(WARNING, "Unknown preprocessor control %t", tp); +		return; +	} +	if (skipping) { +		switch (np->val) { +		case KENDIF: +			if (--ifdepth<skipping) +				skipping = 0; +			--cursource->ifdepth; +			setempty(trp); +			return; + +		case KIFDEF: +		case KIFNDEF: +		case KIF: +			if (++ifdepth >= NIF) +				error(FATAL, "#if too deeply nested"); +			++cursource->ifdepth; +			return; + +		case KELIF: +		case KELSE: +			if (ifdepth<=skipping) +				break; +			return; + +		default: +			return; +		} +	} +	switch (np->val) { +	case KDEFINE: +		dodefine(trp); +		break; + +	case KUNDEF: +		tp += 1; +		if (tp->type!=NAME || trp->lp - trp->bp != 4) { +			error(ERROR, "Syntax error in #undef"); +			break; +		} +		if ((np = lookup(tp, 0)) != NULL) +			np->flag &= ~ISDEFINED; +		break; + +	case KPRAGMA: +		return; + +	case KIFDEF: +	case KIFNDEF: +	case KIF: +		if (++ifdepth >= NIF) +			error(FATAL, "#if too deeply nested"); +		++cursource->ifdepth; +		ifsatisfied[ifdepth] = 0; +		if (eval(trp, np->val)) +			ifsatisfied[ifdepth] = 1; +		else +			skipping = ifdepth; +		break; + +	case KELIF: +		if (ifdepth==0) { +			error(ERROR, "#elif with no #if"); +			return; +		} +		if (ifsatisfied[ifdepth]==2) +			error(ERROR, "#elif after #else"); +		if (eval(trp, np->val)) { +			if (ifsatisfied[ifdepth]) +				skipping = ifdepth; +			else { +				skipping = 0; +				ifsatisfied[ifdepth] = 1; +			} +		} else +			skipping = ifdepth; +		break; + +	case KELSE: +		if (ifdepth==0 || cursource->ifdepth==0) { +			error(ERROR, "#else with no #if"); +			return; +		} +		if (ifsatisfied[ifdepth]==2) +			error(ERROR, "#else after #else"); +		if (trp->lp - trp->bp != 3) +			error(ERROR, "Syntax error in #else"); +		skipping = ifsatisfied[ifdepth]? ifdepth: 0; +		ifsatisfied[ifdepth] = 2; +		break; + +	case KENDIF: +		if (ifdepth==0 || cursource->ifdepth==0) { +			error(ERROR, "#endif with no #if"); +			return; +		} +		--ifdepth; +		--cursource->ifdepth; +		if (trp->lp - trp->bp != 3) +			error(WARNING, "Syntax error in #endif"); +		break; + +	case KWARNING: +		trp->tp = tp+1; +		error(WARNING, "#warning directive: %r", trp); +		break; + +	case KERROR: +		trp->tp = tp+1; +		error(ERROR, "#error directive: %r", trp); +		break; + +	case KLINE: +		trp->tp = tp+1; +		expandrow(trp, "<line>"); +		tp = trp->bp+2; +	kline: +		if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp +		 || ((tp+3==trp->lp && ((tp+1)->type!=STRING))||*(tp+1)->t=='L')){ +			error(ERROR, "Syntax error in #line"); +			return; +		} +		cursource->line = atol((char*)tp->t)-1; +		if (cursource->line<0 || cursource->line>=32768) +			error(WARNING, "#line specifies number out of range"); +		tp = tp+1; +		if (tp+1<trp->lp) +			cursource->filename=(char*)newstring(tp->t+1,tp->len-2,0); +		return; + +	case KDEFINED: +		error(ERROR, "Bad syntax for control line"); +		break; + +	case KINCLUDE: +		doinclude(trp); +		trp->lp = trp->bp; +		return; + +	case KEVAL: +		eval(trp, np->val); +		break; + +	default: +		error(ERROR, "Preprocessor control `%t' not yet implemented", tp); +		break; +	} +	setempty(trp); +} + +void * +domalloc(int size) +{ +	void *p = malloc(size); + +	if (p==NULL) +		error(FATAL, "Out of memory from malloc"); +	return p; +} + +void +dofree(void *p) +{ +	free(p); +} + +void +error(enum errtype type, char *string, ...) +{ +	va_list ap; +	char *cp, *ep; +	Token *tp; +	Tokenrow *trp; +	Source *s; +	int i; + +	fprintf(stderr, "cpp: "); +	for (s=cursource; s; s=s->next) +		if (*s->filename) +			fprintf(stderr, "%s:%d ", s->filename, s->line); +	va_start(ap, string); +	for (ep=string; *ep; ep++) { +		if (*ep=='%') { +			switch (*++ep) { + +			case 's': +				cp = va_arg(ap, char *); +				fprintf(stderr, "%s", cp); +				break; +			case 'd': +				i = va_arg(ap, int); +				fprintf(stderr, "%d", i); +				break; +			case 't': +				tp = va_arg(ap, Token *); +				fprintf(stderr, "%.*s", tp->len, tp->t); +				break; + +			case 'r': +				trp = va_arg(ap, Tokenrow *); +				for (tp=trp->tp; tp<trp->lp&&tp->type!=NL; tp++) { +					if (tp>trp->tp && tp->wslen) +						fputc(' ', stderr); +					fprintf(stderr, "%.*s", tp->len, tp->t); +				} +				break; + +			default: +				fputc(*ep, stderr); +				break; +			} +		} else +			fputc(*ep, stderr); +	} +	va_end(ap); +	fputc('\n', stderr); +	if (type==FATAL) +		exit(1); +	if (type!=WARNING) +		nerrs = 1; +	fflush(stderr); +} diff --git a/src/tools/lcc/cpp/cpp.h b/src/tools/lcc/cpp/cpp.h new file mode 100644 index 0000000..ddd7502 --- /dev/null +++ b/src/tools/lcc/cpp/cpp.h @@ -0,0 +1,166 @@ +#define	INS	32768		/* input buffer */ +#define	OBS	4096		/* outbut buffer */ +#define	NARG	32		/* Max number arguments to a macro */ +#define	NINCLUDE 32		/* Max number of include directories (-I) */ +#define	NIF	32		/* depth of nesting of #if */ +#ifndef EOF +#define	EOF	(-1) +#endif +#ifndef NULL +#define NULL	0 +#endif + +#ifndef __alpha +typedef unsigned char uchar; +#endif + +enum toktype { END, UNCLASS, NAME, NUMBER, STRING, CCON, NL, WS, DSHARP, +		EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS, +		ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS, +		TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST, +		COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET, +		ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH, +		ASRSH, ASOR, ASAND, ELLIPS, +		DSHARP1, NAME1, DEFINED, UMINUS }; + +enum kwtype { KIF, KIFDEF, KIFNDEF, KELIF, KELSE, KENDIF, KINCLUDE, KDEFINE, +		KUNDEF, KLINE, KWARNING, KERROR, KPRAGMA, KDEFINED, +		KLINENO, KFILE, KDATE, KTIME, KSTDC, KEVAL }; + +#define	ISDEFINED	01	/* has #defined value */ +#define	ISKW		02	/* is PP keyword */ +#define	ISUNCHANGE	04	/* can't be #defined in PP */ +#define	ISMAC		010	/* builtin macro, e.g. __LINE__ */ + +#define	EOB	0xFE		/* sentinel for end of input buffer */ +#define	EOFC	0xFD		/* sentinel for end of input file */ +#define	XPWS	1		/* token flag: white space to assure token sep. */ + +typedef struct token { +	unsigned char	type; +	unsigned char 	flag; +	unsigned short	hideset; +	unsigned int	wslen; +	unsigned int	len; +	uchar	*t; +} Token; + +typedef struct tokenrow { +	Token	*tp;		/* current one to scan */ +	Token	*bp;		/* base (allocated value) */ +	Token	*lp;		/* last+1 token used */ +	int	max;		/* number allocated */ +} Tokenrow; + +typedef struct source { +	char	*filename;	/* name of file of the source */ +	int	line;		/* current line number */ +	int	lineinc;	/* adjustment for \\n lines */ +	uchar	*inb;		/* input buffer */ +	uchar	*inp;		/* input pointer */ +	uchar	*inl;		/* end of input */ +	int	fd;		/* input source */ +	int	ifdepth;	/* conditional nesting in include */ +	struct	source *next;	/* stack for #include */ +} Source; + +typedef struct nlist { +	struct nlist *next; +	uchar	*name; +	int	len; +	Tokenrow *vp;		/* value as macro */ +	Tokenrow *ap;		/* list of argument names, if any */ +	char	val;		/* value as preprocessor name */ +	char	flag;		/* is defined, is pp name */ +} Nlist; + +typedef	struct	includelist { +	char	deleted; +	char	always; +	char	*file; +} Includelist; + +#define	new(t)	(t *)domalloc(sizeof(t)) +#define	quicklook(a,b)	(namebit[(a)&077] & (1<<((b)&037))) +#define	quickset(a,b)	namebit[(a)&077] |= (1<<((b)&037)) +extern	unsigned long namebit[077+1]; + +enum errtype { WARNING, ERROR, FATAL }; + +void	expandlex(void); +void	fixlex(void); +void	setup(int, char **); +int	gettokens(Tokenrow *, int); +int	comparetokens(Tokenrow *, Tokenrow *); +Source	*setsource(char *, int, char *); +void	unsetsource(void); +void	puttokens(Tokenrow *); +void	process(Tokenrow *); +void	*domalloc(int); +void	dofree(void *); +void	error(enum errtype, char *, ...); +void	flushout(void); +int	fillbuf(Source *); +int	trigraph(Source *); +int	foldline(Source *); +Nlist	*lookup(Token *, int); +void	control(Tokenrow *); +void	dodefine(Tokenrow *); +void	doadefine(Tokenrow *, int); +void	doinclude(Tokenrow *); +void	appendDirToIncludeList( char *dir ); +void	doif(Tokenrow *, enum kwtype); +void	expand(Tokenrow *, Nlist *); +void	builtin(Tokenrow *, int); +int	gatherargs(Tokenrow *, Tokenrow **, int *); +void	substargs(Nlist *, Tokenrow *, Tokenrow **); +void	expandrow(Tokenrow *, char *); +void	maketokenrow(int, Tokenrow *); +Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); +Token	*growtokenrow(Tokenrow *); +Tokenrow *normtokenrow(Tokenrow *); +void	adjustrow(Tokenrow *, int); +void	movetokenrow(Tokenrow *, Tokenrow *); +void	insertrow(Tokenrow *, int, Tokenrow *); +void	peektokens(Tokenrow *, char *); +void	doconcat(Tokenrow *); +Tokenrow *stringify(Tokenrow *); +int	lookuparg(Nlist *, Token *); +long	eval(Tokenrow *, int); +void	genline(void); +void	setempty(Tokenrow *); +void	makespace(Tokenrow *); +char	*outnum(char *, int); +int	digit(int); +uchar	*newstring(uchar *, int, int); +int	checkhideset(int, Nlist *); +void	prhideset(int); +int	newhideset(int, Nlist *); +int	unionhideset(int, int); +void	iniths(void); +void	setobjname(char *); +#define	rowlen(tokrow)	((tokrow)->lp - (tokrow)->bp) + +char *basepath( char *fname ); + +extern	char *outbufp; +extern	Token	nltoken; +extern	Source *cursource; +extern	char *curtime; +extern	int incdepth; +extern	int ifdepth; +extern	int ifsatisfied[NIF]; +extern	int Mflag; +extern	int skipping; +extern	int verbose; +extern	int Cplusplus; +extern	Nlist *kwdefined; +extern	Includelist includelist[NINCLUDE]; +extern	char wd[]; + +#ifndef _WIN32 +#include <unistd.h> +#else +#include <io.h> +#endif +#include <fcntl.h> diff --git a/src/tools/lcc/cpp/eval.c b/src/tools/lcc/cpp/eval.c new file mode 100644 index 0000000..95a9e11 --- /dev/null +++ b/src/tools/lcc/cpp/eval.c @@ -0,0 +1,524 @@ +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +#define	NSTAK	32 +#define	SGN	0 +#define	UNS	1 +#define	UND	2 + +#define	UNSMARK	0x1000 + +struct value { +	long	val; +	int	type; +}; + +/* conversion types */ +#define	RELAT	1 +#define	ARITH	2 +#define	LOGIC	3 +#define	SPCL	4 +#define	SHIFT	5 +#define	UNARY	6 + +/* operator priority, arity, and conversion type, indexed by tokentype */ +struct pri { +	char	pri; +	char	arity; +	char	ctype; +} priority[] = { +	{ 0, 0, 0 },		/* END */ +	{ 0, 0, 0 },		/* UNCLASS */ +	{ 0, 0, 0 },		/* NAME */ +	{ 0, 0, 0 },		/* NUMBER */ +	{ 0, 0, 0 },		/* STRING */ +	{ 0, 0, 0 },		/* CCON */ +	{ 0, 0, 0 },		/* NL */ +	{ 0, 0, 0 },		/* WS */ +	{ 0, 0, 0 },		/* DSHARP */ +	{ 11, 2, RELAT },	/* EQ */ +	{ 11, 2, RELAT },	/* NEQ */ +	{ 12, 2, RELAT },	/* LEQ */ +	{ 12, 2, RELAT },	/* GEQ */ +	{ 13, 2, SHIFT },	/* LSH */ +	{ 13, 2, SHIFT },	/* RSH */ +	{ 7, 2, LOGIC },	/* LAND */ +	{ 6, 2, LOGIC },	/* LOR */ +	{ 0, 0, 0 },		/* PPLUS */ +	{ 0, 0, 0 },		/* MMINUS */ +	{ 0, 0, 0 },		/* ARROW */ +	{ 0, 0, 0 },		/* SBRA */ +	{ 0, 0, 0 },		/* SKET */ +	{ 3, 0, 0 },		/* LP */ +	{ 3, 0, 0 },		/* RP */ +	{ 0, 0, 0 },		/* DOT */ +	{ 10, 2, ARITH },	/* AND */ +	{ 15, 2, ARITH },	/* STAR */ +	{ 14, 2, ARITH },	/* PLUS */ +	{ 14, 2, ARITH },	/* MINUS */ +	{ 16, 1, UNARY },	/* TILDE */ +	{ 16, 1, UNARY },	/* NOT */ +	{ 15, 2, ARITH },	/* SLASH */ +	{ 15, 2, ARITH },	/* PCT */ +	{ 12, 2, RELAT },	/* LT */ +	{ 12, 2, RELAT },	/* GT */ +	{ 9, 2, ARITH },	/* CIRC */ +	{ 8, 2, ARITH },	/* OR */ +	{ 5, 2, SPCL },		/* QUEST */ +	{ 5, 2, SPCL },		/* COLON */ +	{ 0, 0, 0 },		/* ASGN */ +	{ 4, 2, 0 },		/* COMMA */ +	{ 0, 0, 0 },		/* SHARP */ +	{ 0, 0, 0 },		/* SEMIC */ +	{ 0, 0, 0 },		/* CBRA */ +	{ 0, 0, 0 },		/* CKET */ +	{ 0, 0, 0 },		/* ASPLUS */ + 	{ 0, 0, 0 },		/* ASMINUS */ + 	{ 0, 0, 0 },		/* ASSTAR */ + 	{ 0, 0, 0 },		/* ASSLASH */ + 	{ 0, 0, 0 },		/* ASPCT */ + 	{ 0, 0, 0 },		/* ASCIRC */ + 	{ 0, 0, 0 },		/* ASLSH */ +	{ 0, 0, 0 },		/* ASRSH */ + 	{ 0, 0, 0 },		/* ASOR */ + 	{ 0, 0, 0 },		/* ASAND */ +	{ 0, 0, 0 },		/* ELLIPS */ +	{ 0, 0, 0 },		/* DSHARP1 */ +	{ 0, 0, 0 },		/* NAME1 */ +	{ 16, 1, UNARY },	/* DEFINED */ +	{ 16, 0, UNARY },	/* UMINUS */ +}; + +int	evalop(struct pri); +struct	value tokval(Token *); +struct value vals[NSTAK], *vp; +enum toktype ops[NSTAK], *op; + +/* + * Evaluate an #if #elif #ifdef #ifndef line.  trp->tp points to the keyword. + */ +long +eval(Tokenrow *trp, int kw) +{ +	Token *tp; +	Nlist *np; +	int ntok, rand; + +	trp->tp++; +	if (kw==KIFDEF || kw==KIFNDEF) { +		if (trp->lp - trp->bp != 4 || trp->tp->type!=NAME) { +			error(ERROR, "Syntax error in #ifdef/#ifndef"); +			return 0; +		} +		np = lookup(trp->tp, 0); +		return (kw==KIFDEF) == (np && np->flag&(ISDEFINED|ISMAC)); +	} +	ntok = trp->tp - trp->bp; +	kwdefined->val = KDEFINED;	/* activate special meaning of defined */ +	expandrow(trp, "<if>"); +	kwdefined->val = NAME; +	vp = vals; +	op = ops; +	*op++ = END; +	for (rand=0, tp = trp->bp+ntok; tp < trp->lp; tp++) { +		switch(tp->type) { +		case WS: +		case NL: +			continue; + +		/* nilary */ +		case NAME: +		case NAME1: +		case NUMBER: +		case CCON: +		case STRING: +			if (rand) +				goto syntax; +			*vp++ = tokval(tp); +			rand = 1; +			continue; + +		/* unary */ +		case DEFINED: +		case TILDE: +		case NOT: +			if (rand) +				goto syntax; +			*op++ = tp->type; +			continue; + +		/* unary-binary */ +		case PLUS: case MINUS: case STAR: case AND: +			if (rand==0) { +				if (tp->type==MINUS) +					*op++ = UMINUS; +				if (tp->type==STAR || tp->type==AND) { +					error(ERROR, "Illegal operator * or & in #if/#elsif"); +					return 0; +				} +				continue; +			} +			/* flow through */ + +		/* plain binary */ +		case EQ: case NEQ: case LEQ: case GEQ: case LSH: case RSH: +		case LAND: case LOR: case SLASH: case PCT: +		case LT: case GT: case CIRC: case OR: case QUEST: +		case COLON: case COMMA: +			if (rand==0) +				goto syntax; +			if (evalop(priority[tp->type])!=0) +				return 0; +			*op++ = tp->type; +			rand = 0; +			continue; + +		case LP: +			if (rand) +				goto syntax; +			*op++ = LP; +			continue; + +		case RP: +			if (!rand) +				goto syntax; +			if (evalop(priority[RP])!=0) +				return 0; +			if (op<=ops || op[-1]!=LP) { +				goto syntax; +			} +			op--; +			continue; + +		default: +			error(ERROR,"Bad operator (%t) in #if/#elsif", tp); +			return 0; +		} +	} +	if (rand==0) +		goto syntax; +	if (evalop(priority[END])!=0) +		return 0; +	if (op!=&ops[1] || vp!=&vals[1]) { +		error(ERROR, "Botch in #if/#elsif"); +		return 0; +	} +	if (vals[0].type==UND) +		error(ERROR, "Undefined expression value"); +	return vals[0].val; +syntax: +	error(ERROR, "Syntax error in #if/#elsif"); +	return 0; +} + +int +evalop(struct pri pri) +{ +	struct value v1, v2; +	long rv1, rv2; +	int rtype, oper; + +	/* prevent compiler whining. */ +	v1.val = v2.val = 0; +	v1.type = v2.type = 0; + +	rv2=0; +	rtype=0; +	while (pri.pri < priority[op[-1]].pri) { +		oper = *--op; +		if (priority[oper].arity==2) { +			v2 = *--vp; +			rv2 = v2.val; +		} +		v1 = *--vp; +		rv1 = v1.val; +/*lint -e574 -e644 */ +		switch (priority[oper].ctype) { +		case 0: +		default: +			error(WARNING, "Syntax error in #if/#endif"); +			return 1; +		case ARITH: +		case RELAT: +			if (v1.type==UNS || v2.type==UNS) +				rtype = UNS; +			else +				rtype = SGN; +			if (v1.type==UND || v2.type==UND) +				rtype = UND; +			if (priority[oper].ctype==RELAT && rtype==UNS) { +				oper |= UNSMARK; +				rtype = SGN; +			} +			break; +		case SHIFT: +			if (v1.type==UND || v2.type==UND) +				rtype = UND; +			else +				rtype = v1.type; +			if (rtype==UNS) +				oper |= UNSMARK; +			break; +		case UNARY: +			rtype = v1.type; +			break; +		case LOGIC: +		case SPCL: +			break; +		} +		switch (oper) { +		case EQ: case EQ|UNSMARK: +			rv1 = rv1==rv2; break; +		case NEQ: case NEQ|UNSMARK: +			rv1 = rv1!=rv2; break; +		case LEQ: +			rv1 = rv1<=rv2; break; +		case GEQ: +			rv1 = rv1>=rv2; break; +		case LT: +			rv1 = rv1<rv2; break; +		case GT: +			rv1 = rv1>rv2; break; +		case LEQ|UNSMARK: +			rv1 = (unsigned long)rv1<=rv2; break; +		case GEQ|UNSMARK: +			rv1 = (unsigned long)rv1>=rv2; break; +		case LT|UNSMARK: +			rv1 = (unsigned long)rv1<rv2; break; +		case GT|UNSMARK: +			rv1 = (unsigned long)rv1>rv2; break; +		case LSH: +			rv1 <<= rv2; break; +		case LSH|UNSMARK: +			rv1 = (unsigned long)rv1<<rv2; break; +		case RSH: +			rv1 >>= rv2; break; +		case RSH|UNSMARK: +			rv1 = (unsigned long)rv1>>rv2; break; +		case LAND: +			rtype = UND; +			if (v1.type==UND) +				break; +			if (rv1!=0) { +				if (v2.type==UND) +					break; +				rv1 = rv2!=0; +			} else +				rv1 = 0; +			rtype = SGN; +			break; +		case LOR: +			rtype = UND; +			if (v1.type==UND) +				break; +			if (rv1==0) { +				if (v2.type==UND) +					break; +				rv1 = rv2!=0; +			} else +				rv1 = 1; +			rtype = SGN; +			break; +		case AND: +			rv1 &= rv2; break; +		case STAR: +			rv1 *= rv2; break; +		case PLUS: +			rv1 += rv2; break; +		case MINUS: +			rv1 -= rv2; break; +		case UMINUS: +			if (v1.type==UND) +				rtype = UND; +			rv1 = -rv1; break; +		case OR: +			rv1 |= rv2; break; +		case CIRC: +			rv1 ^= rv2; break; +		case TILDE: +			rv1 = ~rv1; break; +		case NOT: +			rv1 = !rv1; if (rtype!=UND) rtype = SGN; break; +		case SLASH: +			if (rv2==0) { +				rtype = UND; +				break; +			} +			if (rtype==UNS) +				rv1 /= (unsigned long)rv2; +			else +				rv1 /= rv2; +			break; +		case PCT: +			if (rv2==0) { +				rtype = UND; +				break; +			} +			if (rtype==UNS) +				rv1 %= (unsigned long)rv2; +			else +				rv1 %= rv2; +			break; +		case COLON: +			if (op[-1] != QUEST) +				error(ERROR, "Bad ?: in #if/endif"); +			else { +				op--; +				if ((--vp)->val==0) +					v1 = v2; +				rtype = v1.type; +				rv1 = v1.val; +			} +			break; +		case DEFINED: +			break; +		default: +			error(ERROR, "Eval botch (unknown operator)"); +			return 1; +		} +/*lint +e574 +e644 */ +		v1.val = rv1; +		v1.type = rtype; +		*vp++ = v1; +	} +	return 0; +} + +struct value +tokval(Token *tp) +{ +	struct value v; +	Nlist *np; +	int i, base, c; +	unsigned long n; +	uchar *p; + +	v.type = SGN; +	v.val = 0; +	switch (tp->type) { + +	case NAME: +		v.val = 0; +		break; + +	case NAME1: +		if ((np = lookup(tp, 0)) != NULL && np->flag&(ISDEFINED|ISMAC)) +			v.val = 1; +		break; + +	case NUMBER: +		n = 0; +		base = 10; +		p = tp->t; +		c = p[tp->len]; +		p[tp->len] = '\0'; +		if (*p=='0') { +			base = 8; +			if (p[1]=='x' || p[1]=='X') { +				base = 16; +				p++; +			} +			p++; +		} +		for (;; p++) { +			if ((i = digit(*p)) < 0) +				break; +			if (i>=base) +				error(WARNING, +				  "Bad digit in number %t", tp); +			n *= base; +			n += i; +		} +		if (n>=0x80000000 && base!=10) +			v.type = UNS; +		for (; *p; p++) { +			if (*p=='u' || *p=='U') +				v.type = UNS; +			else if (*p=='l' || *p=='L') +				; +			else { +				error(ERROR, +				  "Bad number %t in #if/#elsif", tp); +				break; +			} +		} +		v.val = n; +		tp->t[tp->len] = c; +		break; + +	case CCON: +		n = 0; +		p = tp->t; +		if (*p=='L') { +			p += 1; +			error(WARNING, "Wide char constant value undefined"); +		} +		p += 1; +		if (*p=='\\') { +			p += 1; +			if ((i = digit(*p))>=0 && i<=7) { +				n = i; +				p += 1; +				if ((i = digit(*p))>=0 && i<=7) { +					p += 1; +					n <<= 3; +					n += i; +					if ((i = digit(*p))>=0 && i<=7) { +						p += 1; +						n <<= 3; +						n += i; +					} +				} +			} else if (*p=='x') { +				p += 1; +				while ((i = digit(*p))>=0 && i<=15) { +					p += 1; +					n <<= 4; +					n += i; +				} +			} else { +				static char cvcon[] +				  = "b\bf\fn\nr\rt\tv\v''\"\"??\\\\"; +				for (i=0; i<sizeof(cvcon); i+=2) { +					if (*p == cvcon[i]) { +						n = cvcon[i+1]; +						break; +					} +				} +				p += 1; +				if (i>=sizeof(cvcon)) +					error(WARNING, +					 "Undefined escape in character constant"); +			} +		} else if (*p=='\'') +			error(ERROR, "Empty character constant"); +		else +			n = *p++; +		if (*p!='\'') +			error(WARNING, "Multibyte character constant undefined"); +		else if (n>127) +			error(WARNING, "Character constant taken as not signed"); +		v.val = n; +		break; + +	case STRING: +		error(ERROR, "String in #if/#elsif"); +		break; +	} +	return v; +} + +int +digit(int i) +{ +	if ('0'<=i && i<='9') +		i -= '0'; +	else if ('a'<=i && i<='f') +		i -= 'a'-10; +	else if ('A'<=i && i<='F') +		i -= 'A'-10; +	else +		i = -1; +	return i; +} diff --git a/src/tools/lcc/cpp/getopt.c b/src/tools/lcc/cpp/getopt.c new file mode 100644 index 0000000..c4d1af7 --- /dev/null +++ b/src/tools/lcc/cpp/getopt.c @@ -0,0 +1,53 @@ +#include	<stdio.h> +#include	<string.h> +#define EPR                 fprintf(stderr, +#define ERR(str, chr)       if(opterr){EPR "%s%c\n", str, chr);} +int     opterr = 1; +int     optind = 1; +int	optopt; +char    *optarg; + +int +lcc_getopt (int argc, char *const argv[], const char *opts) +{ +	static int sp = 1; +	int c; +	char *cp; + +	if (sp == 1) { +		if (optind >= argc || +		   argv[optind][0] != '-' || argv[optind][1] == '\0') +			return -1; +		else if (strcmp(argv[optind], "--") == 0) { +			optind++; +			return -1; +		} +	} +	optopt = c = argv[optind][sp]; +	if (c == ':' || (cp=strchr(opts, c)) == 0) { +		ERR (": illegal option -- ", c); +		if (argv[optind][++sp] == '\0') { +			optind++; +			sp = 1; +		} +		return '?'; +	} +	if (*++cp == ':') { +		if (argv[optind][sp+1] != '\0') +			optarg = &argv[optind++][sp+1]; +		else if (++optind >= argc) { +			ERR (": option requires an argument -- ", c); +			sp = 1; +			return '?'; +		} else +			optarg = argv[optind++]; +		sp = 1; +	} else { +		if (argv[optind][++sp] == '\0') { +			sp = 1; +			optind++; +		} +		optarg = 0; +	} +	return c; +} diff --git a/src/tools/lcc/cpp/hideset.c b/src/tools/lcc/cpp/hideset.c new file mode 100644 index 0000000..bd2540d --- /dev/null +++ b/src/tools/lcc/cpp/hideset.c @@ -0,0 +1,112 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +/* + * A hideset is a null-terminated array of Nlist pointers. + * They are referred to by indices in the hidesets array. + * Hideset 0 is empty. + */ + +#define	HSSIZ	32 +typedef	Nlist	**Hideset; +Hideset	*hidesets; +int	nhidesets = 0; +int	maxhidesets = 3; +int	inserths(Hideset, Hideset, Nlist *); + +/* + * Test for membership in a hideset + */ +int +checkhideset(int hs, Nlist *np) +{ +	Hideset hsp; + +	if (hs>=nhidesets) +		abort(); +	for (hsp = hidesets[hs]; *hsp; hsp++) { +		if (*hsp == np) +			return 1; +	} +	return 0; +} + +/* + * Return the (possibly new) hideset obtained by adding np to hs. + */ +int +newhideset(int hs, Nlist *np) +{ +	int i, len; +	Nlist *nhs[HSSIZ+3]; +	Hideset hs1, hs2; + +	len = inserths(nhs, hidesets[hs], np); +	for (i=0; i<nhidesets; i++) { +		for (hs1=nhs, hs2=hidesets[i]; *hs1==*hs2; hs1++, hs2++) +			if (*hs1 == NULL) +				return i; +	} +	if (len>=HSSIZ) +		return hs; +	if (nhidesets >= maxhidesets) { +		maxhidesets = 3*maxhidesets/2+1; +		hidesets = (Hideset *)realloc(hidesets, (sizeof (Hideset *))*maxhidesets); +		if (hidesets == NULL) +			error(FATAL, "Out of memory from realloc"); +	} +	hs1 = (Hideset)domalloc(len*sizeof(Hideset)); +	memmove(hs1, nhs, len*sizeof(Hideset)); +	hidesets[nhidesets] = hs1; +	return nhidesets++; +} + +int +inserths(Hideset dhs, Hideset shs, Nlist *np) +{ +	Hideset odhs = dhs; + +	while (*shs && *shs < np) +		*dhs++ = *shs++; +	if (*shs != np) +		*dhs++ = np; +	do { +		*dhs++ = *shs; +	} while (*shs++); +	return dhs - odhs; +} + +/* + * Hideset union + */ +int +unionhideset(int hs1, int hs2) +{ +	Hideset hp; + +	for (hp = hidesets[hs2]; *hp; hp++) +		hs1 = newhideset(hs1, *hp); +	return hs1; +} + +void +iniths(void) +{ +	hidesets = (Hideset *)domalloc(maxhidesets*sizeof(Hideset *)); +	hidesets[0] = (Hideset)domalloc(sizeof(Hideset)); +	*hidesets[0] = NULL; +	nhidesets++; +} + +void +prhideset(int hs) +{ +	Hideset np; + +	for (np = hidesets[hs]; *np; np++) { +		fprintf(stderr, (char*)(*np)->name, (*np)->len); +		fprintf(stderr, " "); +	} +} diff --git a/src/tools/lcc/cpp/include.c b/src/tools/lcc/cpp/include.c new file mode 100644 index 0000000..5ecd8b3 --- /dev/null +++ b/src/tools/lcc/cpp/include.c @@ -0,0 +1,153 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +Includelist	includelist[NINCLUDE]; + +extern char	*objname; + +void appendDirToIncludeList( char *dir ) +{ +	int i; +	char *fqdir; + +	fqdir = (char *)newstring( (uchar *)includelist[NINCLUDE-1].file, 256, 0 ); +	strcat( fqdir, "/" ); +	strcat( fqdir, dir ); + +	//avoid adding it more than once +	for (i=NINCLUDE-2; i>=0; i--) { +		if (includelist[i].file && +				!strcmp (includelist[i].file, fqdir)) { +			return; +		} +	} + +	for (i=NINCLUDE-2; i>=0; i--) { +		if (includelist[i].file==NULL) { +			includelist[i].always = 1; +			includelist[i].file = fqdir; +			break; +		} +	} +	if (i<0) +		error(FATAL, "Too many -I directives"); +} + +void +doinclude(Tokenrow *trp) +{ +	char fname[256], iname[256]; +	Includelist *ip; +	int angled, len, fd, i; + +	trp->tp += 1; +	if (trp->tp>=trp->lp) +		goto syntax; +	if (trp->tp->type!=STRING && trp->tp->type!=LT) { +		len = trp->tp - trp->bp; +		expandrow(trp, "<include>"); +		trp->tp = trp->bp+len; +	} +	if (trp->tp->type==STRING) { +		len = trp->tp->len-2; +		if (len > sizeof(fname) - 1) +			len = sizeof(fname) - 1; +		strncpy(fname, (char*)trp->tp->t+1, len); +		angled = 0; +	} else if (trp->tp->type==LT) { +		len = 0; +		trp->tp++; +		while (trp->tp->type!=GT) { +			if (trp->tp>trp->lp || len+trp->tp->len+2 >= sizeof(fname)) +				goto syntax; +			strncpy(fname+len, (char*)trp->tp->t, trp->tp->len); +			len += trp->tp->len; +			trp->tp++; +		} +		angled = 1; +	} else +		goto syntax; +	trp->tp += 2; +	if (trp->tp < trp->lp || len==0) +		goto syntax; +	fname[len] = '\0'; + +	appendDirToIncludeList( basepath( fname ) ); + +	if (fname[0]=='/') { +		fd = open(fname, 0); +		strcpy(iname, fname); +	} else for (fd = -1,i=NINCLUDE-1; i>=0; i--) { +		ip = &includelist[i]; +		if (ip->file==NULL || ip->deleted || (angled && ip->always==0)) +			continue; +		if (strlen(fname)+strlen(ip->file)+2 > sizeof(iname)) +			continue; +		strcpy(iname, ip->file); +		strcat(iname, "/"); +		strcat(iname, fname); +		if ((fd = open(iname, 0)) >= 0) +			break; +	} +	if ( Mflag>1 || (!angled&&Mflag==1) ) { +		write(1,objname,strlen(objname)); +		write(1,iname,strlen(iname)); +		write(1,"\n",1); +	} +	if (fd >= 0) { +		if (++incdepth > 10) +			error(FATAL, "#include too deeply nested"); +		setsource((char*)newstring((uchar*)iname, strlen(iname), 0), fd, NULL); +		genline(); +	} else { +		trp->tp = trp->bp+2; +		error(ERROR, "Could not find include file %r", trp); +	} +	return; +syntax: +	error(ERROR, "Syntax error in #include"); +} + +/* + * Generate a line directive for cursource + */ +void +genline(void) +{ +	static Token ta = { UNCLASS }; +	static Tokenrow tr = { &ta, &ta, &ta+1, 1 }; +	uchar *p; + +	ta.t = p = (uchar*)outbufp; +	strcpy((char*)p, "#line "); +	p += sizeof("#line ")-1; +	p = (uchar*)outnum((char*)p, cursource->line); +	*p++ = ' '; *p++ = '"'; +	if (cursource->filename[0]!='/' && wd[0]) { +		strcpy((char*)p, wd); +		p += strlen(wd); +		*p++ = '/'; +	} +	strcpy((char*)p, cursource->filename); +	p += strlen((char*)p); +	*p++ = '"'; *p++ = '\n'; +	ta.len = (char*)p-outbufp; +	outbufp = (char*)p; +	tr.tp = tr.bp; +	puttokens(&tr); +} + +void +setobjname(char *f) +{ +	int n = strlen(f); +	objname = (char*)domalloc(n+5); +	strcpy(objname,f); +	if(objname[n-2]=='.'){ +		strcpy(objname+n-1,"$O: "); +	}else{ +		strcpy(objname+n,"$O: "); +	} +} diff --git a/src/tools/lcc/cpp/lex.c b/src/tools/lcc/cpp/lex.c new file mode 100644 index 0000000..8030354 --- /dev/null +++ b/src/tools/lcc/cpp/lex.c @@ -0,0 +1,580 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +/* + * lexical FSM encoding + *   when in state state, and one of the characters + *   in ch arrives, enter nextstate. + *   States >= S_SELF are either final, or at least require special action. + *   In 'fsm' there is a line for each state X charset X nextstate. + *   List chars that overwrite previous entries later (e.g. C_ALPH + *   can be overridden by '_' by a later entry; and C_XX is the + *   the universal set, and should always be first. + *   States above S_SELF are represented in the big table as negative values. + *   S_SELF and S_SELFB encode the resulting token type in the upper bits. + *   These actions differ in that S_SELF doesn't have a lookahead char, + *   S_SELFB does. + * + *   The encoding is blown out into a big table for time-efficiency. + *   Entries have + *      nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits. + */ + +#define	MAXSTATE 32 +#define	ACT(tok,act)	((tok<<7)+act) +#define	QBSBIT	0100 +#define	GETACT(st)	(st>>7)&0x1ff + +/* character classes */ +#define	C_WS	1 +#define	C_ALPH	2 +#define	C_NUM	3 +#define	C_EOF	4 +#define	C_XX	5 + +enum state { +	START=0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4, +	CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1, +	CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1, +	S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR, +	S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME +}; + +int	tottok; +int	tokkind[256]; +struct	fsm { +	int	state;		/* if in this state */ +	uchar	ch[4];		/* and see one of these characters */ +	int	nextstate;	/* enter this state if +ve */ +}; + +/*const*/ struct fsm fsm[] = { +	/* start state */ +	{START,	{ C_XX },	ACT(UNCLASS,S_SELF)}, +	{START,	{ ' ', '\t', '\v' },	WS1}, +	{START,	{ C_NUM },	NUM1}, +	{START,	{ '.' },	NUM3}, +	{START,	{ C_ALPH },	ID1}, +	{START,	{ 'L' },	ST1}, +	{START,	{ '"' },	ST2}, +	{START,	{ '\'' },	CC1}, +	{START,	{ '/' },	COM1}, +	{START,	{ EOFC },	S_EOF}, +	{START,	{ '\n' },	S_NL}, +	{START,	{ '-' },	MINUS1}, +	{START,	{ '+' },	PLUS1}, +	{START,	{ '<' },	LT1}, +	{START,	{ '>' },	GT1}, +	{START,	{ '=' },	ASG1}, +	{START,	{ '!' },	NOT1}, +	{START,	{ '&' },	AND1}, +	{START,	{ '|' },	OR1}, +	{START,	{ '#' },	SHARP1}, +	{START,	{ '%' },	PCT1}, +	{START,	{ '[' },	ACT(SBRA,S_SELF)}, +	{START,	{ ']' },	ACT(SKET,S_SELF)}, +	{START,	{ '(' },	ACT(LP,S_SELF)}, +	{START,	{ ')' },	ACT(RP,S_SELF)}, +	{START,	{ '*' },	STAR1}, +	{START,	{ ',' },	ACT(COMMA,S_SELF)}, +	{START,	{ '?' },	ACT(QUEST,S_SELF)}, +	{START,	{ ':' },	ACT(COLON,S_SELF)}, +	{START,	{ ';' },	ACT(SEMIC,S_SELF)}, +	{START,	{ '{' },	ACT(CBRA,S_SELF)}, +	{START,	{ '}' },	ACT(CKET,S_SELF)}, +	{START,	{ '~' },	ACT(TILDE,S_SELF)}, +	{START,	{ '^' },	CIRC1}, + +	/* saw a digit */ +	{NUM1,	{ C_XX },	ACT(NUMBER,S_SELFB)}, +	{NUM1,	{ C_NUM, C_ALPH, '.' },	NUM1}, +	{NUM1,	{ 'E', 'e' },	NUM2}, +	{NUM1,	{ '_' },	ACT(NUMBER,S_SELFB)}, + +	/* saw possible start of exponent, digits-e */ +	{NUM2,	{ C_XX },	ACT(NUMBER,S_SELFB)}, +	{NUM2,	{ '+', '-' },	NUM1}, +	{NUM2,	{ C_NUM, C_ALPH },	NUM1}, +	{NUM2,	{ '_' },	ACT(NUMBER,S_SELFB)}, + +	/* saw a '.', which could be a number or an operator */ +	{NUM3,	{ C_XX },	ACT(DOT,S_SELFB)}, +	{NUM3,	{ '.' },	DOTS1}, +	{NUM3,	{ C_NUM },	NUM1}, + +	{DOTS1,	{ C_XX },	ACT(UNCLASS, S_SELFB)}, +	{DOTS1,	{ C_NUM },	NUM1}, +	{DOTS1,	{ '.' },	ACT(ELLIPS, S_SELF)}, + +	/* saw a letter or _ */ +	{ID1,	{ C_XX },	ACT(NAME,S_NAME)}, +	{ID1,	{ C_ALPH, C_NUM },	ID1}, + +	/* saw L (start of wide string?) */ +	{ST1,	{ C_XX },	ACT(NAME,S_NAME)}, +	{ST1,	{ C_ALPH, C_NUM },	ID1}, +	{ST1,	{ '"' },	ST2}, +	{ST1,	{ '\'' },	CC1}, + +	/* saw " beginning string */ +	{ST2,	{ C_XX },	ST2}, +	{ST2,	{ '"' },	ACT(STRING, S_SELF)}, +	{ST2,	{ '\\' },	ST3}, +	{ST2,	{ '\n' },	S_STNL}, +	{ST2,	{ EOFC },	S_EOFSTR}, + +	/* saw \ in string */ +	{ST3,	{ C_XX },	ST2}, +	{ST3,	{ '\n' },	S_STNL}, +	{ST3,	{ EOFC },	S_EOFSTR}, + +	/* saw ' beginning character const */ +	{CC1,	{ C_XX },	CC1}, +	{CC1,	{ '\'' },	ACT(CCON, S_SELF)}, +	{CC1,	{ '\\' },	CC2}, +	{CC1,	{ '\n' },	S_STNL}, +	{CC1,	{ EOFC },	S_EOFSTR}, + +	/* saw \ in ccon */ +	{CC2,	{ C_XX },	CC1}, +	{CC2,	{ '\n' },	S_STNL}, +	{CC2,	{ EOFC },	S_EOFSTR}, + +	/* saw /, perhaps start of comment */ +	{COM1,	{ C_XX },	ACT(SLASH, S_SELFB)}, +	{COM1,	{ '=' },	ACT(ASSLASH, S_SELF)}, +	{COM1,	{ '*' },	COM2}, +	{COM1,	{ '/' },	COM4}, + +	/* saw / then *, start of comment */ +	{COM2,	{ C_XX },	COM2}, +	{COM2,	{ '\n' },	S_COMNL}, +	{COM2,	{ '*' },	COM3}, +	{COM2,	{ EOFC },	S_EOFCOM}, + +	/* saw the * possibly ending a comment */ +	{COM3,	{ C_XX },	COM2}, +	{COM3,	{ '\n' },	S_COMNL}, +	{COM3,	{ '*' },	COM3}, +	{COM3,	{ '/' },	S_COMMENT}, + +	/* // comment */ +	{COM4,	{ C_XX },	COM4}, +	{COM4,	{ '\n' },	S_NL}, +	{COM4,	{ EOFC },	S_EOFCOM}, + +	/* saw white space, eat it up */ +	{WS1,	{ C_XX },	S_WS}, +	{WS1,	{ ' ', '\t', '\v' },	WS1}, + +	/* saw -, check --, -=, -> */ +	{MINUS1,	{ C_XX },	ACT(MINUS, S_SELFB)}, +	{MINUS1,	{ '-' },	ACT(MMINUS, S_SELF)}, +	{MINUS1,	{ '=' },	ACT(ASMINUS,S_SELF)}, +	{MINUS1,	{ '>' },	ACT(ARROW,S_SELF)}, + +	/* saw +, check ++, += */ +	{PLUS1,	{ C_XX },	ACT(PLUS, S_SELFB)}, +	{PLUS1,	{ '+' },	ACT(PPLUS, S_SELF)}, +	{PLUS1,	{ '=' },	ACT(ASPLUS, S_SELF)}, + +	/* saw <, check <<, <<=, <= */ +	{LT1,	{ C_XX },	ACT(LT, S_SELFB)}, +	{LT1,	{ '<' },	LT2}, +	{LT1,	{ '=' },	ACT(LEQ, S_SELF)}, +	{LT2,	{ C_XX },	ACT(LSH, S_SELFB)}, +	{LT2,	{ '=' },	ACT(ASLSH, S_SELF)}, + +	/* saw >, check >>, >>=, >= */ +	{GT1,	{ C_XX },	ACT(GT, S_SELFB)}, +	{GT1,	{ '>' },	GT2}, +	{GT1,	{ '=' },	ACT(GEQ, S_SELF)}, +	{GT2,	{ C_XX },	ACT(RSH, S_SELFB)}, +	{GT2,	{ '=' },	ACT(ASRSH, S_SELF)}, + +	/* = */ +	{ASG1,	{ C_XX },	ACT(ASGN, S_SELFB)}, +	{ASG1,	{ '=' },	ACT(EQ, S_SELF)}, + +	/* ! */ +	{NOT1,	{ C_XX },	ACT(NOT, S_SELFB)}, +	{NOT1,	{ '=' },	ACT(NEQ, S_SELF)}, + +	/* & */ +	{AND1,	{ C_XX },	ACT(AND, S_SELFB)}, +	{AND1,	{ '&' },	ACT(LAND, S_SELF)}, +	{AND1,	{ '=' },	ACT(ASAND, S_SELF)}, + +	/* | */ +	{OR1,	{ C_XX },	ACT(OR, S_SELFB)}, +	{OR1,	{ '|' },	ACT(LOR, S_SELF)}, +	{OR1,	{ '=' },	ACT(ASOR, S_SELF)}, + +	/* # */ +	{SHARP1,	{ C_XX },	ACT(SHARP, S_SELFB)}, +	{SHARP1,	{ '#' },	ACT(DSHARP, S_SELF)}, + +	/* % */ +	{PCT1,	{ C_XX },	ACT(PCT, S_SELFB)}, +	{PCT1,	{ '=' },	ACT(ASPCT, S_SELF)}, + +	/* * */ +	{STAR1,	{ C_XX },	ACT(STAR, S_SELFB)}, +	{STAR1,	{ '=' },	ACT(ASSTAR, S_SELF)}, + +	/* ^ */ +	{CIRC1,	{ C_XX },	ACT(CIRC, S_SELFB)}, +	{CIRC1,	{ '=' },	ACT(ASCIRC, S_SELF)}, + +	{-1} +}; + +/* first index is char, second is state */ +/* increase #states to power of 2 to encourage use of shift */ +short	bigfsm[256][MAXSTATE]; + +void +expandlex(void) +{ +	/*const*/ struct fsm *fp; +	int i, j, nstate; + +	for (fp = fsm; fp->state>=0; fp++) { +		for (i=0; fp->ch[i]; i++) { +			nstate = fp->nextstate; +			if (nstate >= S_SELF) +				nstate = ~nstate; +			switch (fp->ch[i]) { + +			case C_XX:		/* random characters */ +				for (j=0; j<256; j++) +					bigfsm[j][fp->state] = nstate; +				continue; +			case C_ALPH: +				for (j=0; j<=256; j++) +					if (('a'<=j&&j<='z') || ('A'<=j&&j<='Z') +					  || j=='_') +						bigfsm[j][fp->state] = nstate; +				continue; +			case C_NUM: +				for (j='0'; j<='9'; j++) +					bigfsm[j][fp->state] = nstate; +				continue; +			default: +				bigfsm[fp->ch[i]][fp->state] = nstate; +			} +		} +	} +	/* install special cases for ? (trigraphs),  \ (splicing), runes, and EOB */ +	for (i=0; i<MAXSTATE; i++) { +		for (j=0; j<0xFF; j++) +			if (j=='?' || j=='\\') { +				if (bigfsm[j][i]>0) +					bigfsm[j][i] = ~bigfsm[j][i]; +				bigfsm[j][i] &= ~QBSBIT; +			} +		bigfsm[EOB][i] = ~S_EOB; +		if (bigfsm[EOFC][i]>=0) +			bigfsm[EOFC][i] = ~S_EOF; +	} +} + +void +fixlex(void) +{ +	/* do C++ comments? */ +	if (Cplusplus==0) +		bigfsm['/'][COM1] = bigfsm['x'][COM1]; +} + +/* + * fill in a row of tokens from input, terminated by NL or END + * First token is put at trp->lp. + * Reset is non-zero when the input buffer can be "rewound." + * The value is a flag indicating that possible macros have + * been seen in the row. + */ +int +gettokens(Tokenrow *trp, int reset) +{ +	register int c, state, oldstate; +	register uchar *ip; +	register Token *tp, *maxp; +	int runelen; +	Source *s = cursource; +	int nmac = 0; + +	tp = trp->lp; +	ip = s->inp; +	if (reset) { +		s->lineinc = 0; +		if (ip>=s->inl) {		/* nothing in buffer */ +			s->inl = s->inb; +			fillbuf(s); +			ip = s->inp = s->inb; +		} else if (ip >= s->inb+(3*INS/4)) { +			memmove(s->inb, ip, 4+s->inl-ip); +			s->inl = s->inb+(s->inl-ip); +			ip = s->inp = s->inb; +		} +	} +	maxp = &trp->bp[trp->max]; +	runelen = 1; +	for (;;) { +	   continue2: +		if (tp>=maxp) { +			trp->lp = tp; +			tp = growtokenrow(trp); +			maxp = &trp->bp[trp->max]; +		} +		tp->type = UNCLASS; +		tp->hideset = 0; +		tp->t = ip; +		tp->wslen = 0; +		tp->flag = 0; +		state = START; +		for (;;) { +			oldstate = state; +			c = *ip; +			if ((state = bigfsm[c][state]) >= 0) { +				ip += runelen; +				runelen = 1; +				continue; +			} +			state = ~state; +		reswitch: +			switch (state&0177) { +			case S_SELF: +				ip += runelen; +				runelen = 1; +			case S_SELFB: +				tp->type = GETACT(state); +				tp->len = ip - tp->t; +				tp++; +				goto continue2; + +			case S_NAME:	/* like S_SELFB but with nmac check */ +				tp->type = NAME; +				tp->len = ip - tp->t; +				nmac |= quicklook(tp->t[0], tp->len>1?tp->t[1]:0); +				tp++; +				goto continue2; + +			case S_WS: +				tp->wslen = ip - tp->t; +				tp->t = ip; +				state = START; +				continue; + +			default: +				if ((state&QBSBIT)==0) { +					ip += runelen; +					runelen = 1; +					continue; +				} +				state &= ~QBSBIT; +				s->inp = ip; +				if (c=='?') { 	/* check trigraph */ +					if (trigraph(s)) { +						state = oldstate; +						continue; +					} +					goto reswitch; +				} +				if (c=='\\') { /* line-folding */ +					if (foldline(s)) { +						s->lineinc++; +						state = oldstate; +						continue; +					} +					goto reswitch; +				} +				error(WARNING, "Lexical botch in cpp"); +				ip += runelen; +				runelen = 1; +				continue; + +			case S_EOB: +				s->inp = ip; +				fillbuf(cursource); +				state = oldstate; +				continue; + +			case S_EOF: +				tp->type = END; +				tp->len = 0; +				s->inp = ip; +				if (tp!=trp->bp && (tp-1)->type!=NL && cursource->fd!=-1) +					error(WARNING,"No newline at end of file"); +				trp->lp = tp+1; +				return nmac; + +			case S_STNL: +				error(ERROR, "Unterminated string or char const"); +			case S_NL: +				tp->t = ip; +				tp->type = NL; +				tp->len = 1; +				tp->wslen = 0; +				s->lineinc++; +				s->inp = ip+1; +				trp->lp = tp+1; +				return nmac; + +			case S_EOFSTR: +				error(FATAL, "EOF in string or char constant"); +				break; + +			case S_COMNL: +				s->lineinc++; +				state = COM2; +				ip += runelen; +				runelen = 1; +				if (ip >= s->inb+(7*INS/8)) { /* very long comment */ +					memmove(tp->t, ip, 4+s->inl-ip); +					s->inl -= ip-tp->t; +					ip = tp->t+1; +				} +				continue; + +			case S_EOFCOM: +				error(WARNING, "EOF inside comment"); +				--ip; +			case S_COMMENT: +				++ip; +				tp->t = ip; +				tp->t[-1] = ' '; +				tp->wslen = 1; +				state = START; +				continue; +			} +			break; +		} +		ip += runelen; +		runelen = 1; +		tp->len = ip - tp->t; +		tp++; +	} +} + +/* have seen ?; handle the trigraph it starts (if any) else 0 */ +int +trigraph(Source *s) +{ +	int c; + +	while (s->inp+2 >= s->inl && fillbuf(s)!=EOF) +		; +	if (s->inp[1]!='?') +		return 0; +	c = 0; +	switch(s->inp[2]) { +	case '=': +		c = '#'; break; +	case '(': +		c = '['; break; +	case '/': +		c = '\\'; break; +	case ')': +		c = ']'; break; +	case '\'': +		c = '^'; break; +	case '<': +		c = '{'; break; +	case '!': +		c = '|'; break; +	case '>': +		c = '}'; break; +	case '-': +		c = '~'; break; +	} +	if (c) { +		*s->inp = c; +		memmove(s->inp+1, s->inp+3, s->inl-s->inp+2); +		s->inl -= 2; +	} +	return c; +} + +int +foldline(Source *s) +{ +	while (s->inp+1 >= s->inl && fillbuf(s)!=EOF) +		; +	if (s->inp[1] == '\n') { +		memmove(s->inp, s->inp+2, s->inl-s->inp+3); +		s->inl -= 2; +		return 1; +	} +	return 0; +} + +int +fillbuf(Source *s) +{ +	int n, nr; + +	nr = INS/8; +	if ((char *)s->inl+nr > (char *)s->inb+INS) +		error(FATAL, "Input buffer overflow"); +	if (s->fd<0 || (n=read(s->fd, (char *)s->inl, INS/8)) <= 0) +		n = 0; +	if ((*s->inp&0xff) == EOB) /* sentinel character appears in input */ +		*s->inp = EOFC; +	s->inl += n; +	s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB; +	if (n==0) { +		s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC; +		return EOF; +	} +	return 0; +} + +/* + * Push down to new source of characters. + * If fd>0 and str==NULL, then from a file `name'; + * if fd==-1 and str, then from the string. + */ +Source * +setsource(char *name, int fd, char *str) +{ +	Source *s = new(Source); +	int len; + +	s->line = 1; +	s->lineinc = 0; +	s->fd = fd; +	s->filename = name; +	s->next = cursource; +	s->ifdepth = 0; +	cursource = s; +	/* slop at right for EOB */ +	if (str) { +		len = strlen(str); +		s->inb = domalloc(len+4); +		s->inp = s->inb; +		strncpy((char *)s->inp, str, len); +	} else { +		s->inb = domalloc(INS+4); +		s->inp = s->inb; +		len = 0; +	} +	s->inl = s->inp+len; +	s->inl[0] = s->inl[1] = EOB; +	return s; +} + +void +unsetsource(void) +{ +	Source *s = cursource; + +	if (s->fd>=0) { +		close(s->fd); +		dofree(s->inb); +	} +	cursource = s->next; +	dofree(s); +} diff --git a/src/tools/lcc/cpp/macro.c b/src/tools/lcc/cpp/macro.c new file mode 100644 index 0000000..2972083 --- /dev/null +++ b/src/tools/lcc/cpp/macro.c @@ -0,0 +1,514 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +/* + * do a macro definition.  tp points to the name being defined in the line + */ +void +dodefine(Tokenrow *trp) +{ +	Token *tp; +	Nlist *np; +	Tokenrow *def, *args; + +	tp = trp->tp+1; +	if (tp>=trp->lp || tp->type!=NAME) { +		error(ERROR, "#defined token is not a name"); +		return; +	} +	np = lookup(tp, 1); +	if (np->flag&ISUNCHANGE) { +		error(ERROR, "#defined token %t can't be redefined", tp); +		return; +	} +	/* collect arguments */ +	tp += 1; +	args = NULL; +	if (tp<trp->lp && tp->type==LP && tp->wslen==0) { +		/* macro with args */ +		int narg = 0; +		tp += 1; +		args = new(Tokenrow); +		maketokenrow(2, args); +		if (tp->type!=RP) { +			int err = 0; +			for (;;) { +				Token *atp; +				if (tp->type!=NAME) { +					err++; +					break; +				} +				if (narg>=args->max) +					growtokenrow(args); +				for (atp=args->bp; atp<args->lp; atp++) +					if (atp->len==tp->len +					 && strncmp((char*)atp->t, (char*)tp->t, tp->len)==0) +						error(ERROR, "Duplicate macro argument"); +				*args->lp++ = *tp; +				narg++; +				tp += 1; +				if (tp->type==RP) +					break; +				if (tp->type!=COMMA) { +					err++; +					break; +				} +				tp += 1; +			} +			if (err) { +				error(ERROR, "Syntax error in macro parameters"); +				return; +			} +		} +		tp += 1; +	} +	trp->tp = tp; +	if (((trp->lp)-1)->type==NL) +		trp->lp -= 1; +	def = normtokenrow(trp); +	if (np->flag&ISDEFINED) { +		if (comparetokens(def, np->vp) +		 || (np->ap==NULL) != (args==NULL) +		 || (np->ap && comparetokens(args, np->ap))) +			error(ERROR, "Macro redefinition of %t", trp->bp+2); +	} +	if (args) { +		Tokenrow *tap; +		tap = normtokenrow(args); +		dofree(args->bp); +		args = tap; +	} +	np->ap = args; +	np->vp = def; +	np->flag |= ISDEFINED; +} + +/* + * Definition received via -D or -U + */ +void +doadefine(Tokenrow *trp, int type) +{ +	Nlist *np; +	static Token onetoken[1] = {{ NUMBER, 0, 0, 0, 1, (uchar*)"1" }}; +	static Tokenrow onetr = { onetoken, onetoken, onetoken+1, 1 }; + +	trp->tp = trp->bp; +	if (type=='U') { +		if (trp->lp-trp->tp != 2 || trp->tp->type!=NAME) +			goto syntax; +		if ((np = lookup(trp->tp, 0)) == NULL) +			return; +		np->flag &= ~ISDEFINED; +		return; +	} +	if (trp->tp >= trp->lp || trp->tp->type!=NAME) +		goto syntax; +	np = lookup(trp->tp, 1); +	np->flag |= ISDEFINED; +	trp->tp += 1; +	if (trp->tp >= trp->lp || trp->tp->type==END) { +		np->vp = &onetr; +		return; +	} +	if (trp->tp->type!=ASGN) +		goto syntax; +	trp->tp += 1; +	if ((trp->lp-1)->type == END) +		trp->lp -= 1; +	np->vp = normtokenrow(trp); +	return; +syntax: +	error(FATAL, "Illegal -D or -U argument %r", trp); +} +			 +/* + * Do macro expansion in a row of tokens. + * Flag is NULL if more input can be gathered. + */ +void +expandrow(Tokenrow *trp, char *flag) +{ +	Token *tp; +	Nlist *np; + +	if (flag) +		setsource(flag, -1, ""); +	for (tp = trp->tp; tp<trp->lp; ) { +		if (tp->type!=NAME +		 || quicklook(tp->t[0], tp->len>1?tp->t[1]:0)==0 +		 || (np = lookup(tp, 0))==NULL +		 || (np->flag&(ISDEFINED|ISMAC))==0 +		 || (tp->hideset && checkhideset(tp->hideset, np))) { +			tp++; +			continue; +		} +		trp->tp = tp; +		if (np->val==KDEFINED) { +			tp->type = DEFINED; +			if ((tp+1)<trp->lp && (tp+1)->type==NAME) +				(tp+1)->type = NAME1; +			else if ((tp+3)<trp->lp && (tp+1)->type==LP +			 && (tp+2)->type==NAME && (tp+3)->type==RP) +				(tp+2)->type = NAME1; +			else +				error(ERROR, "Incorrect syntax for `defined'"); +			tp++; +			continue; +		} +		if (np->flag&ISMAC) +			builtin(trp, np->val); +		else { +			expand(trp, np); +		} +		tp = trp->tp; +	} +	if (flag) +		unsetsource(); +} + +/* + * Expand the macro whose name is np, at token trp->tp, in the tokenrow. + * Return trp->tp at the first token next to be expanded + * (ordinarily the beginning of the expansion) + */ +void +expand(Tokenrow *trp, Nlist *np) +{ +	Tokenrow ntr; +	int ntokc, narg, i; +	Token *tp; +	Tokenrow *atr[NARG+1]; +	int hs; + +	copytokenrow(&ntr, np->vp);		/* copy macro value */ +	if (np->ap==NULL)			/* parameterless */ +		ntokc = 1; +	else { +		ntokc = gatherargs(trp, atr, &narg); +		if (narg<0) {			/* not actually a call (no '(') */ +			trp->tp++; +			return; +		} +		if (narg != rowlen(np->ap)) { +			error(ERROR, "Disagreement in number of macro arguments"); +			trp->tp->hideset = newhideset(trp->tp->hideset, np); +			trp->tp += ntokc; +			return; +		} +		substargs(np, &ntr, atr);	/* put args into replacement */ +		for (i=0; i<narg; i++) { +			dofree(atr[i]->bp); +			dofree(atr[i]); +		} +	} +	doconcat(&ntr);				/* execute ## operators */ +	hs = newhideset(trp->tp->hideset, np); +	for (tp=ntr.bp; tp<ntr.lp; tp++) {	/* distribute hidesets */ +		if (tp->type==NAME) { +			if (tp->hideset==0) +				tp->hideset = hs; +			else +				tp->hideset = unionhideset(tp->hideset, hs); +		} +	} +	ntr.tp = ntr.bp; +	insertrow(trp, ntokc, &ntr); +	trp->tp -= rowlen(&ntr); +	dofree(ntr.bp); +}	 + +/* + * Gather an arglist, starting in trp with tp pointing at the macro name. + * Return total number of tokens passed, stash number of args found. + * trp->tp is not changed relative to the tokenrow. + */ +int +gatherargs(Tokenrow *trp, Tokenrow **atr, int *narg) +{ +	int parens = 1; +	int ntok = 0; +	Token *bp, *lp; +	Tokenrow ttr; +	int ntokp; +	int needspace; + +	*narg = -1;			/* means that there is no macro call */ +	/* look for the ( */ +	for (;;) { +		trp->tp++; +		ntok++; +		if (trp->tp >= trp->lp) { +			gettokens(trp, 0); +			if ((trp->lp-1)->type==END) { +				trp->lp -= 1; +				trp->tp -= ntok; +				return ntok; +			} +		} +		if (trp->tp->type==LP) +			break; +		if (trp->tp->type!=NL) +			return ntok; +	} +	*narg = 0; +	ntok++; +	ntokp = ntok; +	trp->tp++; +	/* search for the terminating ), possibly extending the row */ +	needspace = 0; +	while (parens>0) { +		if (trp->tp >= trp->lp) +			gettokens(trp, 0); +		if (needspace) { +			needspace = 0; +			makespace(trp); +		} +		if (trp->tp->type==END) { +			trp->lp -= 1; +			trp->tp -= ntok; +			error(ERROR, "EOF in macro arglist"); +			return ntok; +		} +		if (trp->tp->type==NL) { +			trp->tp += 1; +			adjustrow(trp, -1); +			trp->tp -= 1; +			makespace(trp); +			needspace = 1; +			continue; +		} +		if (trp->tp->type==LP) +			parens++; +		else if (trp->tp->type==RP) +			parens--; +		trp->tp++; +		ntok++; +	} +	trp->tp -= ntok; +	/* Now trp->tp won't move underneath us */ +	lp = bp = trp->tp+ntokp; +	for (; parens>=0; lp++) { +		if (lp->type == LP) { +			parens++; +			continue; +		} +		if (lp->type==RP) +			parens--; +		if (lp->type==DSHARP) +			lp->type = DSHARP1;	/* ## not special in arg */ +		if ((lp->type==COMMA && parens==0) || (parens<0 && (lp-1)->type!=LP)) { +			if (*narg>=NARG-1) +				error(FATAL, "Sorry, too many macro arguments"); +			ttr.bp = ttr.tp = bp; +			ttr.lp = lp; +			atr[(*narg)++] = normtokenrow(&ttr); +			bp = lp+1; +		} +	} +	return ntok; +} + +/* + * substitute the argument list into the replacement string + *  This would be simple except for ## and # + */ +void +substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) +{ +	Tokenrow tatr; +	Token *tp; +	int ntok, argno; + +	for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) { +		if (rtr->tp->type==SHARP) {	/* string operator */ +			tp = rtr->tp; +			rtr->tp += 1; +			if ((argno = lookuparg(np, rtr->tp))<0) { +				error(ERROR, "# not followed by macro parameter"); +				continue; +			} +			ntok = 1 + (rtr->tp - tp); +			rtr->tp = tp; +			insertrow(rtr, ntok, stringify(atr[argno])); +			continue; +		} +		if (rtr->tp->type==NAME +		 && (argno = lookuparg(np, rtr->tp)) >= 0) { +			if ((rtr->tp+1)->type==DSHARP +			 || (rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)) +				insertrow(rtr, 1, atr[argno]); +			else { +				copytokenrow(&tatr, atr[argno]); +				expandrow(&tatr, "<macro>"); +				insertrow(rtr, 1, &tatr); +				dofree(tatr.bp); +			} +			continue; +		} +		rtr->tp++; +	} +} + +/* + * Evaluate the ## operators in a tokenrow + */ +void +doconcat(Tokenrow *trp) +{ +	Token *ltp, *ntp; +	Tokenrow ntr; +	int len; + +	for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) { +		if (trp->tp->type==DSHARP1) +			trp->tp->type = DSHARP; +		else if (trp->tp->type==DSHARP) { +			char tt[128]; +			ltp = trp->tp-1; +			ntp = trp->tp+1; +			if (ltp<trp->bp || ntp>=trp->lp) { +				error(ERROR, "## occurs at border of replacement"); +				continue; +			} +			len = ltp->len + ntp->len; +			strncpy((char*)tt, (char*)ltp->t, ltp->len); +			strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); +			tt[len] = '\0'; +			setsource("<##>", -1, tt); +			maketokenrow(3, &ntr); +			gettokens(&ntr, 1); +			unsetsource(); +			if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) +				error(WARNING, "Bad token %r produced by ##", &ntr); +			ntr.lp = ntr.bp+1; +			trp->tp = ltp; +			makespace(&ntr); +			insertrow(trp, (ntp-ltp)+1, &ntr); +			dofree(ntr.bp); +			trp->tp--; +		} +	} +} + +/* + * tp is a potential parameter name of macro mac; + * look it up in mac's arglist, and if found, return the + * corresponding index in the argname array.  Return -1 if not found. + */ +int +lookuparg(Nlist *mac, Token *tp) +{ +	Token *ap; + +	if (tp->type!=NAME || mac->ap==NULL) +		return -1; +	for (ap=mac->ap->bp; ap<mac->ap->lp; ap++) { +		if (ap->len==tp->len && strncmp((char*)ap->t,(char*)tp->t,ap->len)==0) +			return ap - mac->ap->bp; +	} +	return -1; +} + +/* + * Return a quoted version of the tokenrow (from # arg) + */ +#define	STRLEN	512 +Tokenrow * +stringify(Tokenrow *vp) +{ +	static Token t = { STRING }; +	static Tokenrow tr = { &t, &t, &t+1, 1 }; +	Token *tp; +	uchar s[STRLEN]; +	uchar *sp = s, *cp; +	int i, instring; + +	*sp++ = '"'; +	for (tp = vp->bp; tp < vp->lp; tp++) { +		instring = tp->type==STRING || tp->type==CCON; +		if (sp+2*tp->len >= &s[STRLEN-10]) { +			error(ERROR, "Stringified macro arg is too long"); +			break; +		} +		if (tp->wslen && (tp->flag&XPWS)==0) +			*sp++ = ' '; +		for (i=0, cp=tp->t; i<tp->len; i++) {	 +			if (instring && (*cp=='"' || *cp=='\\')) +				*sp++ = '\\'; +			*sp++ = *cp++; +		} +	} +	*sp++ = '"'; +	*sp = '\0'; +	sp = s; +	t.len = strlen((char*)sp); +	t.t = newstring(sp, t.len, 0); +	return &tr; +} + +/* + * expand a builtin name + */ +void +builtin(Tokenrow *trp, int biname) +{ +	char *op; +	Token *tp; +	Source *s; + +	tp = trp->tp; +	trp->tp++; +	/* need to find the real source */ +	s = cursource; +	while (s && s->fd==-1) +		s = s->next; +	if (s==NULL) +		s = cursource; +	/* most are strings */ +	tp->type = STRING; +	if (tp->wslen) { +		*outbufp++ = ' '; +		tp->wslen = 1; +	} +	op = outbufp; +	*op++ = '"'; +	switch (biname) { + +	case KLINENO: +		tp->type = NUMBER; +		op = outnum(op-1, s->line); +		break; + +	case KFILE: { +		char *src = s->filename; +		while ((*op++ = *src++) != 0) +			if (src[-1] == '\\') +				*op++ = '\\'; +		op--; +		break; +		} + +	case KDATE: +		strncpy(op, curtime+4, 7); +		strncpy(op+7, curtime+20, 4); +		op += 11; +		break; + +	case KTIME: +		strncpy(op, curtime+11, 8); +		op += 8; +		break; + +	default: +		error(ERROR, "cpp botch: unknown internal macro"); +		return; +	} +	if (tp->type==STRING) +		*op++ = '"'; +	tp->t = (uchar*)outbufp; +	tp->len = op - outbufp; +	outbufp = op; +} diff --git a/src/tools/lcc/cpp/nlist.c b/src/tools/lcc/cpp/nlist.c new file mode 100644 index 0000000..d3a8357 --- /dev/null +++ b/src/tools/lcc/cpp/nlist.c @@ -0,0 +1,104 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +extern	char	*optarg; +extern	int	optind; +extern	int	verbose; +extern	int	Cplusplus; +Nlist	*kwdefined; +char	wd[128]; + +#define	NLSIZE	128 + +static Nlist	*nlist[NLSIZE]; + +struct	kwtab { +	char	*kw; +	int	val; +	int	flag; +} kwtab[] = { +	{"if",		KIF,		ISKW}, +	{"ifdef",	KIFDEF,		ISKW}, +	{"ifndef",	KIFNDEF,	ISKW}, +	{"elif",		KELIF,		ISKW}, +	{"else",		KELSE,		ISKW}, +	{"endif",	KENDIF,		ISKW}, +	{"include",	KINCLUDE,	ISKW}, +	{"define",	KDEFINE,	ISKW}, +	{"undef",	KUNDEF,		ISKW}, +	{"line",		KLINE,		ISKW}, +	{"warning",	KWARNING,	ISKW}, +	{"error",	KERROR,		ISKW}, +	{"pragma",	KPRAGMA,	ISKW}, +	{"eval",		KEVAL,		ISKW}, +	{"defined",	KDEFINED,	ISDEFINED+ISUNCHANGE}, +	{"__LINE__",	KLINENO,	ISMAC+ISUNCHANGE}, +	{"__FILE__",	KFILE,		ISMAC+ISUNCHANGE}, +	{"__DATE__",	KDATE,		ISMAC+ISUNCHANGE}, +	{"__TIME__",	KTIME,		ISMAC+ISUNCHANGE}, +	{"__STDC__",	KSTDC,		ISUNCHANGE}, +	{NULL} +}; + +unsigned long	namebit[077+1]; +Nlist 	*np; + +void +setup_kwtab(void) +{ +	struct kwtab *kp; +	Nlist *np; +	Token t; +	static Token deftoken[1] = {{ NAME, 0, 0, 0, 7, (uchar*)"defined" }}; +	static Tokenrow deftr = { deftoken, deftoken, deftoken+1, 1 }; + +	for (kp=kwtab; kp->kw; kp++) { +		t.t = (uchar*)kp->kw; +		t.len = strlen(kp->kw); +		np = lookup(&t, 1); +		np->flag = kp->flag; +		np->val = kp->val; +		if (np->val == KDEFINED) { +			kwdefined = np; +			np->val = NAME; +			np->vp = &deftr; +			np->ap = 0; +		} +	} +} + +Nlist * +lookup(Token *tp, int install) +{ +	unsigned int h; +	Nlist *np; +	uchar *cp, *cpe; + +	h = 0; +	for (cp=tp->t, cpe=cp+tp->len; cp<cpe; ) +		h += *cp++; +	h %= NLSIZE; +	np = nlist[h]; +	while (np) { +		if (*tp->t==*np->name && tp->len==np->len  +		 && strncmp((char*)tp->t, (char*)np->name, tp->len)==0) +			return np; +		np = np->next; +	} +	if (install) { +		np = new(Nlist); +		np->vp = NULL; +		np->ap = NULL; +		np->flag = 0; +		np->val = 0; +		np->len = tp->len; +		np->name = newstring(tp->t, tp->len, 0); +		np->next = nlist[h]; +		nlist[h] = np; +		quickset(tp->t[0], tp->len>1? tp->t[1]:0); +		return np; +	} +	return NULL; +} diff --git a/src/tools/lcc/cpp/tokens.c b/src/tools/lcc/cpp/tokens.c new file mode 100644 index 0000000..3570896 --- /dev/null +++ b/src/tools/lcc/cpp/tokens.c @@ -0,0 +1,370 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +static char wbuf[2*OBS]; +static char *wbp = wbuf; + +/* + * 1 for tokens that don't need whitespace when they get inserted + * by macro expansion + */ +static const char wstab[] = { +	0,	/* END */ +	0,	/* UNCLASS */ +	0,	/* NAME */ +	0,	/* NUMBER */ +	0,	/* STRING */ +	0,	/* CCON */ +	1,	/* NL */ +	0,	/* WS */ +	0,	/* DSHARP */ +	0,	/* EQ */ +	0,	/* NEQ */ +	0,	/* LEQ */ +	0,	/* GEQ */ +	0,	/* LSH */ +	0,	/* RSH */ +	0,	/* LAND */ +	0,	/* LOR */ +	0,	/* PPLUS */ +	0,	/* MMINUS */ +	0,	/* ARROW */ +	1,	/* SBRA */ +	1,	/* SKET */ +	1,	/* LP */ +	1,	/* RP */ +	0,	/* DOT */ +	0,	/* AND */ +	0,	/* STAR */ +	0,	/* PLUS */ +	0,	/* MINUS */ +	0,	/* TILDE */ +	0,	/* NOT */ +	0,	/* SLASH */ +	0,	/* PCT */ +	0,	/* LT */ +	0,	/* GT */ +	0,	/* CIRC */ +	0,	/* OR */ +	0,	/* QUEST */ +	0,	/* COLON */ +	0,	/* ASGN */ +	1,	/* COMMA */ +	0,	/* SHARP */ +	1,	/* SEMIC */ +	1,	/* CBRA */ +	1,	/* CKET */ +	0,	/* ASPLUS */ + 	0,	/* ASMINUS */ + 	0,	/* ASSTAR */ + 	0,	/* ASSLASH */ + 	0,	/* ASPCT */ + 	0,	/* ASCIRC */ + 	0,	/* ASLSH */ +	0,	/* ASRSH */ + 	0,	/* ASOR */ + 	0,	/* ASAND */ +	0,	/* ELLIPS */ +	0,	/* DSHARP1 */ +	0,	/* NAME1 */ +	0,	/* DEFINED */ +	0,	/* UMINUS */ +}; + +void +maketokenrow(int size, Tokenrow *trp) +{ +	trp->max = size; +	if (size>0) +		trp->bp = (Token *)domalloc(size*sizeof(Token)); +	else +		trp->bp = NULL; +	trp->tp = trp->bp; +	trp->lp = trp->bp; +} + +Token * +growtokenrow(Tokenrow *trp) +{ +	int ncur = trp->tp - trp->bp; +	int nlast = trp->lp - trp->bp; + +	trp->max = 3*trp->max/2 + 1; +	trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token)); +	if (trp->bp == NULL) +		error(FATAL, "Out of memory from realloc"); +	trp->lp = &trp->bp[nlast]; +	trp->tp = &trp->bp[ncur]; +	return trp->lp; +} + +/* + * Compare a row of tokens, ignoring the content of WS; return !=0 if different + */ +int +comparetokens(Tokenrow *tr1, Tokenrow *tr2) +{ +	Token *tp1, *tp2; + +	tp1 = tr1->tp; +	tp2 = tr2->tp; +	if (tr1->lp-tp1 != tr2->lp-tp2) +		return 1; +	for (; tp1<tr1->lp ; tp1++, tp2++) { +		if (tp1->type != tp2->type +		 || (tp1->wslen==0) != (tp2->wslen==0) +		 || tp1->len != tp2->len +		 || strncmp((char*)tp1->t, (char*)tp2->t, tp1->len)!=0) +			return 1; +	} +	return 0; +} + +/* + * replace ntok tokens starting at dtr->tp with the contents of str. + * tp ends up pointing just beyond the replacement. + * Canonical whitespace is assured on each side. + */ +void +insertrow(Tokenrow *dtr, int ntok, Tokenrow *str) +{ +	int nrtok = rowlen(str); + +	dtr->tp += ntok; +	adjustrow(dtr, nrtok-ntok); +	dtr->tp -= ntok; +	movetokenrow(dtr, str); +	makespace(dtr); +	dtr->tp += nrtok; +	makespace(dtr); +} + +/* + * make sure there is WS before trp->tp, if tokens might merge in the output + */ +void +makespace(Tokenrow *trp) +{ +	uchar *tt; +	Token *tp = trp->tp; + +	if (tp >= trp->lp) +		return; +	if (tp->wslen) { +		if (tp->flag&XPWS +		 && (wstab[tp->type] || (trp->tp>trp->bp && wstab[(tp-1)->type]))) { +			tp->wslen = 0; +			return; +		} +		tp->t[-1] = ' '; +		return; +	} +	if (wstab[tp->type] || (trp->tp>trp->bp && wstab[(tp-1)->type])) +		return; +	tt = newstring(tp->t, tp->len, 1); +	*tt++ = ' '; +	tp->t = tt; +	tp->wslen = 1; +	tp->flag |= XPWS; +} + +/* + * Copy an entire tokenrow into another, at tp. + * It is assumed that there is enough space. + *  Not strictly conforming. + */ +void +movetokenrow(Tokenrow *dtr, Tokenrow *str) +{ +	int nby; + +	/* nby = sizeof(Token) * (str->lp - str->bp); */ +	nby = (char *)str->lp - (char *)str->bp; +	memmove(dtr->tp, str->bp, nby); +} + +/* + * Move the tokens in a row, starting at tr->tp, rightward by nt tokens; + * nt may be negative (left move). + * The row may need to be grown. + * Non-strictly conforming because of the (char *), but easily fixed + */ +void +adjustrow(Tokenrow *trp, int nt) +{ +	int nby, size; + +	if (nt==0) +		return; +	size = (trp->lp - trp->bp) + nt; +	while (size > trp->max) +		growtokenrow(trp); +	/* nby = sizeof(Token) * (trp->lp - trp->tp); */ +	nby = (char *)trp->lp - (char *)trp->tp; +	if (nby) +		memmove(trp->tp+nt, trp->tp, nby); +	trp->lp += nt; +} + +/* + * Copy a row of tokens into the destination holder, allocating + * the space for the contents.  Return the destination. + */ +Tokenrow * +copytokenrow(Tokenrow *dtr, Tokenrow *str) +{ +	int len = rowlen(str); + +	maketokenrow(len, dtr); +	movetokenrow(dtr, str); +	dtr->lp += len; +	return dtr; +} + +/* + * Produce a copy of a row of tokens.  Start at trp->tp. + * The value strings are copied as well.  The first token + * has WS available. + */ +Tokenrow * +normtokenrow(Tokenrow *trp) +{ +	Token *tp; +	Tokenrow *ntrp = new(Tokenrow); +	int len; + +	len = trp->lp - trp->tp; +	if (len<=0) +		len = 1; +	maketokenrow(len, ntrp); +	for (tp=trp->tp; tp < trp->lp; tp++) { +		*ntrp->lp = *tp; +		if (tp->len) { +			ntrp->lp->t = newstring(tp->t, tp->len, 1); +			*ntrp->lp->t++ = ' '; +			if (tp->wslen) +				ntrp->lp->wslen = 1; +		} +		ntrp->lp++; +	} +	if (ntrp->lp > ntrp->bp) +		ntrp->bp->wslen = 0; +	return ntrp; +} + +/* + * Debugging + */ +void +peektokens(Tokenrow *trp, char *str) +{ +	Token *tp; + +	tp = trp->tp; +	flushout(); +	if (str) +		fprintf(stderr, "%s ", str); +	if (tp<trp->bp || tp>trp->lp) +		fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp)); +	for (tp=trp->bp; tp<trp->lp && tp<trp->bp+32; tp++) { +		if (tp->type!=NL) { +			int c = tp->t[tp->len]; +			tp->t[tp->len] = 0; +			fprintf(stderr, "%s", tp->t); +			tp->t[tp->len] = c; +		} +		if (tp->type==NAME) { +			fprintf(stderr, tp==trp->tp?"{*":"{"); +			prhideset(tp->hideset); +			fprintf(stderr, "} "); +		} else +			fprintf(stderr, tp==trp->tp?"{%x*} ":"{%x} ", tp->type); +	} +	fprintf(stderr, "\n"); +	fflush(stderr); +} + +void +puttokens(Tokenrow *trp) +{ +	Token *tp; +	int len; +	uchar *p; + +	if (verbose) +		peektokens(trp, ""); +	tp = trp->bp; +	for (; tp<trp->lp; tp++) { +		len = tp->len+tp->wslen; +		p = tp->t-tp->wslen; +		while (tp<trp->lp-1 && p+len == (tp+1)->t - (tp+1)->wslen) { +			tp++; +			len += tp->wslen+tp->len; +		} +		if (len>OBS/2) {		/* handle giant token */ +			if (wbp > wbuf) +				write(1, wbuf, wbp-wbuf); +			write(1, (char *)p, len); +			wbp = wbuf; +		} else {	 +			memcpy(wbp, p, len); +			wbp += len; +		} +		if (wbp >= &wbuf[OBS]) { +			write(1, wbuf, OBS); +			if (wbp > &wbuf[OBS]) +				memcpy(wbuf, wbuf+OBS, wbp - &wbuf[OBS]); +			wbp -= OBS; +		} +	} +	trp->tp = tp; +	if (cursource->fd==0) +		flushout(); +} + +void +flushout(void) +{ +	if (wbp>wbuf) { +		write(1, wbuf, wbp-wbuf); +		wbp = wbuf; +	} +} + +/* + * turn a row into just a newline + */ +void +setempty(Tokenrow *trp) +{ +	trp->tp = trp->bp; +	trp->lp = trp->bp+1; +	*trp->bp = nltoken; +} + +/* + * generate a number + */ +char * +outnum(char *p, int n) +{ +	if (n>=10) +		p = outnum(p, n/10); +	*p++ = n%10 + '0'; +	return p; +} + +/* + * allocate and initialize a new string from s, of length l, at offset o + * Null terminated. + */ +uchar * +newstring(uchar *s, int l, int o) +{ +	uchar *ns = (uchar *)domalloc(l+o+1); + +	ns[l+o] = '\0'; +	return (uchar*)strncpy((char*)ns+o, (char*)s, l) - o; +} diff --git a/src/tools/lcc/cpp/unix.c b/src/tools/lcc/cpp/unix.c new file mode 100644 index 0000000..ff1496a --- /dev/null +++ b/src/tools/lcc/cpp/unix.c @@ -0,0 +1,134 @@ +#include <stdio.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include "cpp.h" + +extern	int lcc_getopt(int, char *const *, const char *); +extern	char	*optarg, rcsid[]; +extern	int	optind; +int	verbose; +int	Mflag;	/* only print active include files */ +char	*objname; /* "src.$O: " */ +int	Cplusplus = 1; + +void +setup(int argc, char **argv) +{ +	int c, fd, i; +	char *fp, *dp; +	Tokenrow tr; +	extern void setup_kwtab(void); +	uchar *includeDirs[ NINCLUDE ] = { 0 }; +	int   numIncludeDirs = 0; + +	setup_kwtab(); +	while ((c = lcc_getopt(argc, argv, "MNOVv+I:D:U:F:lg")) != -1) +		switch (c) { +		case 'N': +			for (i=0; i<NINCLUDE; i++) +				if (includelist[i].always==1) +					includelist[i].deleted = 1; +			break; +		case 'I': +			includeDirs[ numIncludeDirs++ ] = newstring( (uchar *)optarg, strlen( optarg ), 0 ); +			break; +		case 'D': +		case 'U': +			setsource("<cmdarg>", -1, optarg); +			maketokenrow(3, &tr); +			gettokens(&tr, 1); +			doadefine(&tr, c); +			unsetsource(); +			break; +		case 'M': +			Mflag++; +			break; +		case 'v': +			fprintf(stderr, "%s %s\n", argv[0], rcsid); +			break; +		case 'V': +			verbose++; +			break; +		case '+': +			Cplusplus++; +			break; +		default: +			break; +		} +	dp = "."; +	fp = "<stdin>"; +	fd = 0; +	if (optind<argc) { +		dp = basepath( argv[optind] ); +		fp = (char*)newstring((uchar*)argv[optind], strlen(argv[optind]), 0); +		if ((fd = open(fp, 0)) <= 0) +			error(FATAL, "Can't open input file %s", fp); +	} +	if (optind+1<argc) { +		int fdo; +#ifdef WIN32 +		fdo = creat(argv[optind+1], _S_IREAD | _S_IWRITE); +#else +		fdo = creat(argv[optind+1], 0666); +#endif +		if (fdo<0) +			error(FATAL, "Can't open output file %s", argv[optind+1]); +		dup2(fdo, 1); +	} +	if(Mflag) +		setobjname(fp); +	includelist[NINCLUDE-1].always = 0; +	includelist[NINCLUDE-1].file = dp; + +	for( i = 0; i < numIncludeDirs; i++ ) +		appendDirToIncludeList( (char *)includeDirs[ i ] ); + +	setsource(fp, fd, NULL); +} + + +char *basepath( char *fname ) +{ +	char *dp = "."; +	char *p; +	if ((p = strrchr(fname, '/')) != NULL) { +		int dlen = p - fname; +		dp = (char*)newstring((uchar*)fname, dlen+1, 0); +		dp[dlen] = '\0'; +	} + +	return dp; +} + +/* memmove is defined here because some vendors don't provide it at +   all and others do a terrible job (like calling malloc) */ +// -- ouch, that hurts -- ln +#ifndef __APPLE__   /* always use the system memmove() on Mac OS X. --ryan. */ +#ifdef memmove +#undef memmove +#endif +void * +memmove(void *dp, const void *sp, size_t n) +{ +	unsigned char *cdp, *csp; + +	if (n<=0) +		return dp; +	cdp = dp; +	csp = (unsigned char *)sp; +	if (cdp < csp) { +		do { +			*cdp++ = *csp++; +		} while (--n); +	} else { +		cdp += n; +		csp += n; +		do { +			*--cdp = *--csp; +		} while (--n); +	} +	return dp; +} +#endif  | 
