diff options
author | Paweł Redman <pawel.redman@gmail.com> | 2017-03-22 17:56:34 +0100 |
---|---|---|
committer | Paweł Redman <pawel.redman@gmail.com> | 2017-03-22 17:56:34 +0100 |
commit | 6a777afc079c2a8d3af3ecd2145fe8dd50567a39 (patch) | |
tree | 520f4489cebf8564ef6cb27064ceea45cbc005b3 /src/tools/lcc/cpp |
Diffstat (limited to 'src/tools/lcc/cpp')
-rw-r--r-- | src/tools/lcc/cpp/cpp.c | 322 | ||||
-rw-r--r-- | src/tools/lcc/cpp/cpp.h | 166 | ||||
-rw-r--r-- | src/tools/lcc/cpp/eval.c | 524 | ||||
-rw-r--r-- | src/tools/lcc/cpp/getopt.c | 53 | ||||
-rw-r--r-- | src/tools/lcc/cpp/hideset.c | 112 | ||||
-rw-r--r-- | src/tools/lcc/cpp/include.c | 154 | ||||
-rw-r--r-- | src/tools/lcc/cpp/lex.c | 580 | ||||
-rw-r--r-- | src/tools/lcc/cpp/macro.c | 515 | ||||
-rw-r--r-- | src/tools/lcc/cpp/nlist.c | 103 | ||||
-rw-r--r-- | src/tools/lcc/cpp/tokens.c | 370 | ||||
-rw-r--r-- | src/tools/lcc/cpp/unix.c | 128 |
11 files changed, 3027 insertions, 0 deletions
diff --git a/src/tools/lcc/cpp/cpp.c b/src/tools/lcc/cpp/cpp.c new file mode 100644 index 0000000..6739e4d --- /dev/null +++ b/src/tools/lcc/cpp/cpp.c @@ -0,0 +1,322 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <stdarg.h> +#include "cpp.h" + +char rcsid[] = "cpp.c - faked rcsid"; + +#define OUTS 16384 +char outbuf[OUTS]; +char *outp = outbuf; +Source *cursource; +int nerrs; +struct token nltoken = { NL, 0, 0, 0, 1, (uchar*)"\n" }; +char *curtime; +int incdepth; +int ifdepth; +int ifsatisfied[NIF]; +int skipping; + + +int +main(int argc, char **argv) +{ + Tokenrow tr; + time_t t; + char ebuf[BUFSIZ]; + + setbuf(stderr, ebuf); + t = time(NULL); + curtime = ctime(&t); + maketokenrow(3, &tr); + expandlex(); + setup(argc, argv); + fixlex(); + iniths(); + genline(); + process(&tr); + flushout(); + fflush(stderr); + exit(nerrs > 0); + return 0; +} + +void +process(Tokenrow *trp) +{ + int anymacros = 0; + + for (;;) { + if (trp->tp >= trp->lp) { + trp->tp = trp->lp = trp->bp; + outp = outbuf; + anymacros |= gettokens(trp, 1); + trp->tp = trp->bp; + } + if (trp->tp->type == END) { + if (--incdepth>=0) { + if (cursource->ifdepth) + error(ERROR, + "Unterminated conditional in #include"); + unsetsource(); + cursource->line += cursource->lineinc; + trp->tp = trp->lp; + genline(); + continue; + } + if (ifdepth) + error(ERROR, "Unterminated #if/#ifdef/#ifndef"); + break; + } + if (trp->tp->type==SHARP) { + trp->tp += 1; + control(trp); + } else if (!skipping && anymacros) + expandrow(trp, NULL); + if (skipping) + setempty(trp); + puttokens(trp); + anymacros = 0; + cursource->line += cursource->lineinc; + if (cursource->lineinc>1) { + genline(); + } + } +} + +void +control(Tokenrow *trp) +{ + Nlist *np; + Token *tp; + + tp = trp->tp; + if (tp->type!=NAME) { + if (tp->type==NUMBER) + goto kline; + if (tp->type != NL) + error(ERROR, "Unidentifiable control line"); + return; /* else empty line */ + } + if ((np = lookup(tp, 0))==NULL || ((np->flag&ISKW)==0 && !skipping)) { + error(WARNING, "Unknown preprocessor control %t", tp); + return; + } + if (skipping) { + switch (np->val) { + case KENDIF: + if (--ifdepth<skipping) + skipping = 0; + --cursource->ifdepth; + setempty(trp); + return; + + case KIFDEF: + case KIFNDEF: + case KIF: + if (++ifdepth >= NIF) + error(FATAL, "#if too deeply nested"); + ++cursource->ifdepth; + return; + + case KELIF: + case KELSE: + if (ifdepth<=skipping) + break; + return; + + default: + return; + } + } + switch (np->val) { + case KDEFINE: + dodefine(trp); + break; + + case KUNDEF: + tp += 1; + if (tp->type!=NAME || trp->lp - trp->bp != 4) { + error(ERROR, "Syntax error in #undef"); + break; + } + if ((np = lookup(tp, 0)) != NULL) + np->flag &= ~ISDEFINED; + break; + + case KPRAGMA: + return; + + case KIFDEF: + case KIFNDEF: + case KIF: + if (++ifdepth >= NIF) + error(FATAL, "#if too deeply nested"); + ++cursource->ifdepth; + ifsatisfied[ifdepth] = 0; + if (eval(trp, np->val)) + ifsatisfied[ifdepth] = 1; + else + skipping = ifdepth; + break; + + case KELIF: + if (ifdepth==0) { + error(ERROR, "#elif with no #if"); + return; + } + if (ifsatisfied[ifdepth]==2) + error(ERROR, "#elif after #else"); + if (eval(trp, np->val)) { + if (ifsatisfied[ifdepth]) + skipping = ifdepth; + else { + skipping = 0; + ifsatisfied[ifdepth] = 1; + } + } else + skipping = ifdepth; + break; + + case KELSE: + if (ifdepth==0 || cursource->ifdepth==0) { + error(ERROR, "#else with no #if"); + return; + } + if (ifsatisfied[ifdepth]==2) + error(ERROR, "#else after #else"); + if (trp->lp - trp->bp != 3) + error(ERROR, "Syntax error in #else"); + skipping = ifsatisfied[ifdepth]? ifdepth: 0; + ifsatisfied[ifdepth] = 2; + break; + + case KENDIF: + if (ifdepth==0 || cursource->ifdepth==0) { + error(ERROR, "#endif with no #if"); + return; + } + --ifdepth; + --cursource->ifdepth; + if (trp->lp - trp->bp != 3) + error(WARNING, "Syntax error in #endif"); + break; + + case KERROR: + trp->tp = tp+1; + error(WARNING, "#error directive: %r", trp); + break; + + case KLINE: + trp->tp = tp+1; + expandrow(trp, "<line>"); + tp = trp->bp+2; + kline: + if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp + || ((tp+3==trp->lp && ((tp+1)->type!=STRING))||*(tp+1)->t=='L')){ + error(ERROR, "Syntax error in #line"); + return; + } + cursource->line = atol((char*)tp->t)-1; + if (cursource->line<0 || cursource->line>=32768) + error(WARNING, "#line specifies number out of range"); + tp = tp+1; + if (tp+1<trp->lp) + cursource->filename=(char*)newstring(tp->t+1,tp->len-2,0); + return; + + case KDEFINED: + error(ERROR, "Bad syntax for control line"); + break; + + case KINCLUDE: + doinclude(trp); + trp->lp = trp->bp; + return; + + case KEVAL: + eval(trp, np->val); + break; + + default: + error(ERROR, "Preprocessor control `%t' not yet implemented", tp); + break; + } + setempty(trp); + return; +} + +void * +domalloc(int size) +{ + void *p = malloc(size); + + if (p==NULL) + error(FATAL, "Out of memory from malloc"); + return p; +} + +void +dofree(void *p) +{ + free(p); +} + +void +error(enum errtype type, char *string, ...) +{ + va_list ap; + char *cp, *ep; + Token *tp; + Tokenrow *trp; + Source *s; + int i; + + fprintf(stderr, "cpp: "); + for (s=cursource; s; s=s->next) + if (*s->filename) + fprintf(stderr, "%s:%d ", s->filename, s->line); + va_start(ap, string); + for (ep=string; *ep; ep++) { + if (*ep=='%') { + switch (*++ep) { + + case 's': + cp = va_arg(ap, char *); + fprintf(stderr, "%s", cp); + break; + case 'd': + i = va_arg(ap, int); + fprintf(stderr, "%d", i); + break; + case 't': + tp = va_arg(ap, Token *); + fprintf(stderr, "%.*s", tp->len, tp->t); + break; + + case 'r': + trp = va_arg(ap, Tokenrow *); + for (tp=trp->tp; tp<trp->lp&&tp->type!=NL; tp++) { + if (tp>trp->tp && tp->wslen) + fputc(' ', stderr); + fprintf(stderr, "%.*s", tp->len, tp->t); + } + break; + + default: + fputc(*ep, stderr); + break; + } + } else + fputc(*ep, stderr); + } + va_end(ap); + fputc('\n', stderr); + if (type==FATAL) + exit(1); + if (type!=WARNING) + nerrs = 1; + fflush(stderr); +} diff --git a/src/tools/lcc/cpp/cpp.h b/src/tools/lcc/cpp/cpp.h new file mode 100644 index 0000000..ae855c9 --- /dev/null +++ b/src/tools/lcc/cpp/cpp.h @@ -0,0 +1,166 @@ +#define INS 32768 /* input buffer */ +#define OBS 4096 /* outbut buffer */ +#define NARG 32 /* Max number arguments to a macro */ +#define NINCLUDE 32 /* Max number of include directories (-I) */ +#define NIF 32 /* depth of nesting of #if */ +#ifndef EOF +#define EOF (-1) +#endif +#ifndef NULL +#define NULL 0 +#endif + +#ifndef __alpha +typedef unsigned char uchar; +#endif + +enum toktype { END, UNCLASS, NAME, NUMBER, STRING, CCON, NL, WS, DSHARP, + EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS, + ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS, + TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST, + COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET, + ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH, + ASRSH, ASOR, ASAND, ELLIPS, + DSHARP1, NAME1, DEFINED, UMINUS }; + +enum kwtype { KIF, KIFDEF, KIFNDEF, KELIF, KELSE, KENDIF, KINCLUDE, KDEFINE, + KUNDEF, KLINE, KERROR, KPRAGMA, KDEFINED, + KLINENO, KFILE, KDATE, KTIME, KSTDC, KEVAL }; + +#define ISDEFINED 01 /* has #defined value */ +#define ISKW 02 /* is PP keyword */ +#define ISUNCHANGE 04 /* can't be #defined in PP */ +#define ISMAC 010 /* builtin macro, e.g. __LINE__ */ + +#define EOB 0xFE /* sentinel for end of input buffer */ +#define EOFC 0xFD /* sentinel for end of input file */ +#define XPWS 1 /* token flag: white space to assure token sep. */ + +typedef struct token { + unsigned char type; + unsigned char flag; + unsigned short hideset; + unsigned int wslen; + unsigned int len; + uchar *t; +} Token; + +typedef struct tokenrow { + Token *tp; /* current one to scan */ + Token *bp; /* base (allocated value) */ + Token *lp; /* last+1 token used */ + int max; /* number allocated */ +} Tokenrow; + +typedef struct source { + char *filename; /* name of file of the source */ + int line; /* current line number */ + int lineinc; /* adjustment for \\n lines */ + uchar *inb; /* input buffer */ + uchar *inp; /* input pointer */ + uchar *inl; /* end of input */ + int fd; /* input source */ + int ifdepth; /* conditional nesting in include */ + struct source *next; /* stack for #include */ +} Source; + +typedef struct nlist { + struct nlist *next; + uchar *name; + int len; + Tokenrow *vp; /* value as macro */ + Tokenrow *ap; /* list of argument names, if any */ + char val; /* value as preprocessor name */ + char flag; /* is defined, is pp name */ +} Nlist; + +typedef struct includelist { + char deleted; + char always; + char *file; +} Includelist; + +#define new(t) (t *)domalloc(sizeof(t)) +#define quicklook(a,b) (namebit[(a)&077] & (1<<((b)&037))) +#define quickset(a,b) namebit[(a)&077] |= (1<<((b)&037)) +extern unsigned long namebit[077+1]; + +enum errtype { WARNING, ERROR, FATAL }; + +void expandlex(void); +void fixlex(void); +void setup(int, char **); +int gettokens(Tokenrow *, int); +int comparetokens(Tokenrow *, Tokenrow *); +Source *setsource(char *, int, char *); +void unsetsource(void); +void puttokens(Tokenrow *); +void process(Tokenrow *); +void *domalloc(int); +void dofree(void *); +void error(enum errtype, char *, ...); +void flushout(void); +int fillbuf(Source *); +int trigraph(Source *); +int foldline(Source *); +Nlist *lookup(Token *, int); +void control(Tokenrow *); +void dodefine(Tokenrow *); +void doadefine(Tokenrow *, int); +void doinclude(Tokenrow *); +void appendDirToIncludeList( char *dir ); +void doif(Tokenrow *, enum kwtype); +void expand(Tokenrow *, Nlist *); +void builtin(Tokenrow *, int); +int gatherargs(Tokenrow *, Tokenrow **, int *); +void substargs(Nlist *, Tokenrow *, Tokenrow **); +void expandrow(Tokenrow *, char *); +void maketokenrow(int, Tokenrow *); +Tokenrow *copytokenrow(Tokenrow *, Tokenrow *); +Token *growtokenrow(Tokenrow *); +Tokenrow *normtokenrow(Tokenrow *); +void adjustrow(Tokenrow *, int); +void movetokenrow(Tokenrow *, Tokenrow *); +void insertrow(Tokenrow *, int, Tokenrow *); +void peektokens(Tokenrow *, char *); +void doconcat(Tokenrow *); +Tokenrow *stringify(Tokenrow *); +int lookuparg(Nlist *, Token *); +long eval(Tokenrow *, int); +void genline(void); +void setempty(Tokenrow *); +void makespace(Tokenrow *); +char *outnum(char *, int); +int digit(int); +uchar *newstring(uchar *, int, int); +int checkhideset(int, Nlist *); +void prhideset(int); +int newhideset(int, Nlist *); +int unionhideset(int, int); +void iniths(void); +void setobjname(char *); +#define rowlen(tokrow) ((tokrow)->lp - (tokrow)->bp) + +char *basepath( char *fname ); + +extern char *outp; +extern Token nltoken; +extern Source *cursource; +extern char *curtime; +extern int incdepth; +extern int ifdepth; +extern int ifsatisfied[NIF]; +extern int Mflag; +extern int skipping; +extern int verbose; +extern int Cplusplus; +extern Nlist *kwdefined; +extern Includelist includelist[NINCLUDE]; +extern char wd[]; + +#ifndef _WIN32 +#include <unistd.h> +#else +#include <io.h> +#endif +#include <fcntl.h> diff --git a/src/tools/lcc/cpp/eval.c b/src/tools/lcc/cpp/eval.c new file mode 100644 index 0000000..95a9e11 --- /dev/null +++ b/src/tools/lcc/cpp/eval.c @@ -0,0 +1,524 @@ +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +#define NSTAK 32 +#define SGN 0 +#define UNS 1 +#define UND 2 + +#define UNSMARK 0x1000 + +struct value { + long val; + int type; +}; + +/* conversion types */ +#define RELAT 1 +#define ARITH 2 +#define LOGIC 3 +#define SPCL 4 +#define SHIFT 5 +#define UNARY 6 + +/* operator priority, arity, and conversion type, indexed by tokentype */ +struct pri { + char pri; + char arity; + char ctype; +} priority[] = { + { 0, 0, 0 }, /* END */ + { 0, 0, 0 }, /* UNCLASS */ + { 0, 0, 0 }, /* NAME */ + { 0, 0, 0 }, /* NUMBER */ + { 0, 0, 0 }, /* STRING */ + { 0, 0, 0 }, /* CCON */ + { 0, 0, 0 }, /* NL */ + { 0, 0, 0 }, /* WS */ + { 0, 0, 0 }, /* DSHARP */ + { 11, 2, RELAT }, /* EQ */ + { 11, 2, RELAT }, /* NEQ */ + { 12, 2, RELAT }, /* LEQ */ + { 12, 2, RELAT }, /* GEQ */ + { 13, 2, SHIFT }, /* LSH */ + { 13, 2, SHIFT }, /* RSH */ + { 7, 2, LOGIC }, /* LAND */ + { 6, 2, LOGIC }, /* LOR */ + { 0, 0, 0 }, /* PPLUS */ + { 0, 0, 0 }, /* MMINUS */ + { 0, 0, 0 }, /* ARROW */ + { 0, 0, 0 }, /* SBRA */ + { 0, 0, 0 }, /* SKET */ + { 3, 0, 0 }, /* LP */ + { 3, 0, 0 }, /* RP */ + { 0, 0, 0 }, /* DOT */ + { 10, 2, ARITH }, /* AND */ + { 15, 2, ARITH }, /* STAR */ + { 14, 2, ARITH }, /* PLUS */ + { 14, 2, ARITH }, /* MINUS */ + { 16, 1, UNARY }, /* TILDE */ + { 16, 1, UNARY }, /* NOT */ + { 15, 2, ARITH }, /* SLASH */ + { 15, 2, ARITH }, /* PCT */ + { 12, 2, RELAT }, /* LT */ + { 12, 2, RELAT }, /* GT */ + { 9, 2, ARITH }, /* CIRC */ + { 8, 2, ARITH }, /* OR */ + { 5, 2, SPCL }, /* QUEST */ + { 5, 2, SPCL }, /* COLON */ + { 0, 0, 0 }, /* ASGN */ + { 4, 2, 0 }, /* COMMA */ + { 0, 0, 0 }, /* SHARP */ + { 0, 0, 0 }, /* SEMIC */ + { 0, 0, 0 }, /* CBRA */ + { 0, 0, 0 }, /* CKET */ + { 0, 0, 0 }, /* ASPLUS */ + { 0, 0, 0 }, /* ASMINUS */ + { 0, 0, 0 }, /* ASSTAR */ + { 0, 0, 0 }, /* ASSLASH */ + { 0, 0, 0 }, /* ASPCT */ + { 0, 0, 0 }, /* ASCIRC */ + { 0, 0, 0 }, /* ASLSH */ + { 0, 0, 0 }, /* ASRSH */ + { 0, 0, 0 }, /* ASOR */ + { 0, 0, 0 }, /* ASAND */ + { 0, 0, 0 }, /* ELLIPS */ + { 0, 0, 0 }, /* DSHARP1 */ + { 0, 0, 0 }, /* NAME1 */ + { 16, 1, UNARY }, /* DEFINED */ + { 16, 0, UNARY }, /* UMINUS */ +}; + +int evalop(struct pri); +struct value tokval(Token *); +struct value vals[NSTAK], *vp; +enum toktype ops[NSTAK], *op; + +/* + * Evaluate an #if #elif #ifdef #ifndef line. trp->tp points to the keyword. + */ +long +eval(Tokenrow *trp, int kw) +{ + Token *tp; + Nlist *np; + int ntok, rand; + + trp->tp++; + if (kw==KIFDEF || kw==KIFNDEF) { + if (trp->lp - trp->bp != 4 || trp->tp->type!=NAME) { + error(ERROR, "Syntax error in #ifdef/#ifndef"); + return 0; + } + np = lookup(trp->tp, 0); + return (kw==KIFDEF) == (np && np->flag&(ISDEFINED|ISMAC)); + } + ntok = trp->tp - trp->bp; + kwdefined->val = KDEFINED; /* activate special meaning of defined */ + expandrow(trp, "<if>"); + kwdefined->val = NAME; + vp = vals; + op = ops; + *op++ = END; + for (rand=0, tp = trp->bp+ntok; tp < trp->lp; tp++) { + switch(tp->type) { + case WS: + case NL: + continue; + + /* nilary */ + case NAME: + case NAME1: + case NUMBER: + case CCON: + case STRING: + if (rand) + goto syntax; + *vp++ = tokval(tp); + rand = 1; + continue; + + /* unary */ + case DEFINED: + case TILDE: + case NOT: + if (rand) + goto syntax; + *op++ = tp->type; + continue; + + /* unary-binary */ + case PLUS: case MINUS: case STAR: case AND: + if (rand==0) { + if (tp->type==MINUS) + *op++ = UMINUS; + if (tp->type==STAR || tp->type==AND) { + error(ERROR, "Illegal operator * or & in #if/#elsif"); + return 0; + } + continue; + } + /* flow through */ + + /* plain binary */ + case EQ: case NEQ: case LEQ: case GEQ: case LSH: case RSH: + case LAND: case LOR: case SLASH: case PCT: + case LT: case GT: case CIRC: case OR: case QUEST: + case COLON: case COMMA: + if (rand==0) + goto syntax; + if (evalop(priority[tp->type])!=0) + return 0; + *op++ = tp->type; + rand = 0; + continue; + + case LP: + if (rand) + goto syntax; + *op++ = LP; + continue; + + case RP: + if (!rand) + goto syntax; + if (evalop(priority[RP])!=0) + return 0; + if (op<=ops || op[-1]!=LP) { + goto syntax; + } + op--; + continue; + + default: + error(ERROR,"Bad operator (%t) in #if/#elsif", tp); + return 0; + } + } + if (rand==0) + goto syntax; + if (evalop(priority[END])!=0) + return 0; + if (op!=&ops[1] || vp!=&vals[1]) { + error(ERROR, "Botch in #if/#elsif"); + return 0; + } + if (vals[0].type==UND) + error(ERROR, "Undefined expression value"); + return vals[0].val; +syntax: + error(ERROR, "Syntax error in #if/#elsif"); + return 0; +} + +int +evalop(struct pri pri) +{ + struct value v1, v2; + long rv1, rv2; + int rtype, oper; + + /* prevent compiler whining. */ + v1.val = v2.val = 0; + v1.type = v2.type = 0; + + rv2=0; + rtype=0; + while (pri.pri < priority[op[-1]].pri) { + oper = *--op; + if (priority[oper].arity==2) { + v2 = *--vp; + rv2 = v2.val; + } + v1 = *--vp; + rv1 = v1.val; +/*lint -e574 -e644 */ + switch (priority[oper].ctype) { + case 0: + default: + error(WARNING, "Syntax error in #if/#endif"); + return 1; + case ARITH: + case RELAT: + if (v1.type==UNS || v2.type==UNS) + rtype = UNS; + else + rtype = SGN; + if (v1.type==UND || v2.type==UND) + rtype = UND; + if (priority[oper].ctype==RELAT && rtype==UNS) { + oper |= UNSMARK; + rtype = SGN; + } + break; + case SHIFT: + if (v1.type==UND || v2.type==UND) + rtype = UND; + else + rtype = v1.type; + if (rtype==UNS) + oper |= UNSMARK; + break; + case UNARY: + rtype = v1.type; + break; + case LOGIC: + case SPCL: + break; + } + switch (oper) { + case EQ: case EQ|UNSMARK: + rv1 = rv1==rv2; break; + case NEQ: case NEQ|UNSMARK: + rv1 = rv1!=rv2; break; + case LEQ: + rv1 = rv1<=rv2; break; + case GEQ: + rv1 = rv1>=rv2; break; + case LT: + rv1 = rv1<rv2; break; + case GT: + rv1 = rv1>rv2; break; + case LEQ|UNSMARK: + rv1 = (unsigned long)rv1<=rv2; break; + case GEQ|UNSMARK: + rv1 = (unsigned long)rv1>=rv2; break; + case LT|UNSMARK: + rv1 = (unsigned long)rv1<rv2; break; + case GT|UNSMARK: + rv1 = (unsigned long)rv1>rv2; break; + case LSH: + rv1 <<= rv2; break; + case LSH|UNSMARK: + rv1 = (unsigned long)rv1<<rv2; break; + case RSH: + rv1 >>= rv2; break; + case RSH|UNSMARK: + rv1 = (unsigned long)rv1>>rv2; break; + case LAND: + rtype = UND; + if (v1.type==UND) + break; + if (rv1!=0) { + if (v2.type==UND) + break; + rv1 = rv2!=0; + } else + rv1 = 0; + rtype = SGN; + break; + case LOR: + rtype = UND; + if (v1.type==UND) + break; + if (rv1==0) { + if (v2.type==UND) + break; + rv1 = rv2!=0; + } else + rv1 = 1; + rtype = SGN; + break; + case AND: + rv1 &= rv2; break; + case STAR: + rv1 *= rv2; break; + case PLUS: + rv1 += rv2; break; + case MINUS: + rv1 -= rv2; break; + case UMINUS: + if (v1.type==UND) + rtype = UND; + rv1 = -rv1; break; + case OR: + rv1 |= rv2; break; + case CIRC: + rv1 ^= rv2; break; + case TILDE: + rv1 = ~rv1; break; + case NOT: + rv1 = !rv1; if (rtype!=UND) rtype = SGN; break; + case SLASH: + if (rv2==0) { + rtype = UND; + break; + } + if (rtype==UNS) + rv1 /= (unsigned long)rv2; + else + rv1 /= rv2; + break; + case PCT: + if (rv2==0) { + rtype = UND; + break; + } + if (rtype==UNS) + rv1 %= (unsigned long)rv2; + else + rv1 %= rv2; + break; + case COLON: + if (op[-1] != QUEST) + error(ERROR, "Bad ?: in #if/endif"); + else { + op--; + if ((--vp)->val==0) + v1 = v2; + rtype = v1.type; + rv1 = v1.val; + } + break; + case DEFINED: + break; + default: + error(ERROR, "Eval botch (unknown operator)"); + return 1; + } +/*lint +e574 +e644 */ + v1.val = rv1; + v1.type = rtype; + *vp++ = v1; + } + return 0; +} + +struct value +tokval(Token *tp) +{ + struct value v; + Nlist *np; + int i, base, c; + unsigned long n; + uchar *p; + + v.type = SGN; + v.val = 0; + switch (tp->type) { + + case NAME: + v.val = 0; + break; + + case NAME1: + if ((np = lookup(tp, 0)) != NULL && np->flag&(ISDEFINED|ISMAC)) + v.val = 1; + break; + + case NUMBER: + n = 0; + base = 10; + p = tp->t; + c = p[tp->len]; + p[tp->len] = '\0'; + if (*p=='0') { + base = 8; + if (p[1]=='x' || p[1]=='X') { + base = 16; + p++; + } + p++; + } + for (;; p++) { + if ((i = digit(*p)) < 0) + break; + if (i>=base) + error(WARNING, + "Bad digit in number %t", tp); + n *= base; + n += i; + } + if (n>=0x80000000 && base!=10) + v.type = UNS; + for (; *p; p++) { + if (*p=='u' || *p=='U') + v.type = UNS; + else if (*p=='l' || *p=='L') + ; + else { + error(ERROR, + "Bad number %t in #if/#elsif", tp); + break; + } + } + v.val = n; + tp->t[tp->len] = c; + break; + + case CCON: + n = 0; + p = tp->t; + if (*p=='L') { + p += 1; + error(WARNING, "Wide char constant value undefined"); + } + p += 1; + if (*p=='\\') { + p += 1; + if ((i = digit(*p))>=0 && i<=7) { + n = i; + p += 1; + if ((i = digit(*p))>=0 && i<=7) { + p += 1; + n <<= 3; + n += i; + if ((i = digit(*p))>=0 && i<=7) { + p += 1; + n <<= 3; + n += i; + } + } + } else if (*p=='x') { + p += 1; + while ((i = digit(*p))>=0 && i<=15) { + p += 1; + n <<= 4; + n += i; + } + } else { + static char cvcon[] + = "b\bf\fn\nr\rt\tv\v''\"\"??\\\\"; + for (i=0; i<sizeof(cvcon); i+=2) { + if (*p == cvcon[i]) { + n = cvcon[i+1]; + break; + } + } + p += 1; + if (i>=sizeof(cvcon)) + error(WARNING, + "Undefined escape in character constant"); + } + } else if (*p=='\'') + error(ERROR, "Empty character constant"); + else + n = *p++; + if (*p!='\'') + error(WARNING, "Multibyte character constant undefined"); + else if (n>127) + error(WARNING, "Character constant taken as not signed"); + v.val = n; + break; + + case STRING: + error(ERROR, "String in #if/#elsif"); + break; + } + return v; +} + +int +digit(int i) +{ + if ('0'<=i && i<='9') + i -= '0'; + else if ('a'<=i && i<='f') + i -= 'a'-10; + else if ('A'<=i && i<='F') + i -= 'A'-10; + else + i = -1; + return i; +} diff --git a/src/tools/lcc/cpp/getopt.c b/src/tools/lcc/cpp/getopt.c new file mode 100644 index 0000000..d4cacad --- /dev/null +++ b/src/tools/lcc/cpp/getopt.c @@ -0,0 +1,53 @@ +#include <stdio.h>
+#include <string.h>
+#define EPR fprintf(stderr,
+#define ERR(str, chr) if(opterr){EPR "%s%c\n", str, chr);}
+int opterr = 1;
+int optind = 1;
+int optopt;
+char *optarg;
+
+int
+lcc_getopt (int argc, char *const argv[], const char *opts)
+{
+ static int sp = 1;
+ int c;
+ char *cp;
+
+ if (sp == 1) {
+ if (optind >= argc ||
+ argv[optind][0] != '-' || argv[optind][1] == '\0')
+ return -1;
+ else if (strcmp(argv[optind], "--") == 0) {
+ optind++;
+ return -1;
+ }
+ }
+ optopt = c = argv[optind][sp];
+ if (c == ':' || (cp=strchr(opts, c)) == 0) {
+ ERR (": illegal option -- ", c);
+ if (argv[optind][++sp] == '\0') {
+ optind++;
+ sp = 1;
+ }
+ return '?';
+ }
+ if (*++cp == ':') {
+ if (argv[optind][sp+1] != '\0')
+ optarg = &argv[optind++][sp+1];
+ else if (++optind >= argc) {
+ ERR (": option requires an argument -- ", c);
+ sp = 1;
+ return '?';
+ } else
+ optarg = argv[optind++];
+ sp = 1;
+ } else {
+ if (argv[optind][++sp] == '\0') {
+ sp = 1;
+ optind++;
+ }
+ optarg = 0;
+ }
+ return c;
+}
diff --git a/src/tools/lcc/cpp/hideset.c b/src/tools/lcc/cpp/hideset.c new file mode 100644 index 0000000..bd2540d --- /dev/null +++ b/src/tools/lcc/cpp/hideset.c @@ -0,0 +1,112 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +/* + * A hideset is a null-terminated array of Nlist pointers. + * They are referred to by indices in the hidesets array. + * Hideset 0 is empty. + */ + +#define HSSIZ 32 +typedef Nlist **Hideset; +Hideset *hidesets; +int nhidesets = 0; +int maxhidesets = 3; +int inserths(Hideset, Hideset, Nlist *); + +/* + * Test for membership in a hideset + */ +int +checkhideset(int hs, Nlist *np) +{ + Hideset hsp; + + if (hs>=nhidesets) + abort(); + for (hsp = hidesets[hs]; *hsp; hsp++) { + if (*hsp == np) + return 1; + } + return 0; +} + +/* + * Return the (possibly new) hideset obtained by adding np to hs. + */ +int +newhideset(int hs, Nlist *np) +{ + int i, len; + Nlist *nhs[HSSIZ+3]; + Hideset hs1, hs2; + + len = inserths(nhs, hidesets[hs], np); + for (i=0; i<nhidesets; i++) { + for (hs1=nhs, hs2=hidesets[i]; *hs1==*hs2; hs1++, hs2++) + if (*hs1 == NULL) + return i; + } + if (len>=HSSIZ) + return hs; + if (nhidesets >= maxhidesets) { + maxhidesets = 3*maxhidesets/2+1; + hidesets = (Hideset *)realloc(hidesets, (sizeof (Hideset *))*maxhidesets); + if (hidesets == NULL) + error(FATAL, "Out of memory from realloc"); + } + hs1 = (Hideset)domalloc(len*sizeof(Hideset)); + memmove(hs1, nhs, len*sizeof(Hideset)); + hidesets[nhidesets] = hs1; + return nhidesets++; +} + +int +inserths(Hideset dhs, Hideset shs, Nlist *np) +{ + Hideset odhs = dhs; + + while (*shs && *shs < np) + *dhs++ = *shs++; + if (*shs != np) + *dhs++ = np; + do { + *dhs++ = *shs; + } while (*shs++); + return dhs - odhs; +} + +/* + * Hideset union + */ +int +unionhideset(int hs1, int hs2) +{ + Hideset hp; + + for (hp = hidesets[hs2]; *hp; hp++) + hs1 = newhideset(hs1, *hp); + return hs1; +} + +void +iniths(void) +{ + hidesets = (Hideset *)domalloc(maxhidesets*sizeof(Hideset *)); + hidesets[0] = (Hideset)domalloc(sizeof(Hideset)); + *hidesets[0] = NULL; + nhidesets++; +} + +void +prhideset(int hs) +{ + Hideset np; + + for (np = hidesets[hs]; *np; np++) { + fprintf(stderr, (char*)(*np)->name, (*np)->len); + fprintf(stderr, " "); + } +} diff --git a/src/tools/lcc/cpp/include.c b/src/tools/lcc/cpp/include.c new file mode 100644 index 0000000..1bb8847 --- /dev/null +++ b/src/tools/lcc/cpp/include.c @@ -0,0 +1,154 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +Includelist includelist[NINCLUDE]; + +extern char *objname; + +void appendDirToIncludeList( char *dir ) +{ + int i; + char *fqdir; + + fqdir = (char *)newstring( (uchar *)includelist[NINCLUDE-1].file, 256, 0 ); + strcat( fqdir, "/" ); + strcat( fqdir, dir ); + + //avoid adding it more than once + for (i=NINCLUDE-2; i>=0; i--) { + if (includelist[i].file && + !strcmp (includelist[i].file, fqdir)) { + return; + } + } + + for (i=NINCLUDE-2; i>=0; i--) { + if (includelist[i].file==NULL) { + includelist[i].always = 1; + includelist[i].file = fqdir; + break; + } + } + if (i<0) + error(FATAL, "Too many -I directives"); +} + +void +doinclude(Tokenrow *trp) +{ + char fname[256], iname[256]; + Includelist *ip; + int angled, len, fd, i; + + trp->tp += 1; + if (trp->tp>=trp->lp) + goto syntax; + if (trp->tp->type!=STRING && trp->tp->type!=LT) { + len = trp->tp - trp->bp; + expandrow(trp, "<include>"); + trp->tp = trp->bp+len; + } + if (trp->tp->type==STRING) { + len = trp->tp->len-2; + if (len > sizeof(fname) - 1) + len = sizeof(fname) - 1; + strncpy(fname, (char*)trp->tp->t+1, len); + angled = 0; + } else if (trp->tp->type==LT) { + len = 0; + trp->tp++; + while (trp->tp->type!=GT) { + if (trp->tp>trp->lp || len+trp->tp->len+2 >= sizeof(fname)) + goto syntax; + strncpy(fname+len, (char*)trp->tp->t, trp->tp->len); + len += trp->tp->len; + trp->tp++; + } + angled = 1; + } else + goto syntax; + trp->tp += 2; + if (trp->tp < trp->lp || len==0) + goto syntax; + fname[len] = '\0'; + + appendDirToIncludeList( basepath( fname ) ); + + if (fname[0]=='/') { + fd = open(fname, 0); + strcpy(iname, fname); + } else for (fd = -1,i=NINCLUDE-1; i>=0; i--) { + ip = &includelist[i]; + if (ip->file==NULL || ip->deleted || (angled && ip->always==0)) + continue; + if (strlen(fname)+strlen(ip->file)+2 > sizeof(iname)) + continue; + strcpy(iname, ip->file); + strcat(iname, "/"); + strcat(iname, fname); + if ((fd = open(iname, 0)) >= 0) + break; + } + if ( Mflag>1 || (!angled&&Mflag==1) ) { + write(1,objname,strlen(objname)); + write(1,iname,strlen(iname)); + write(1,"\n",1); + } + if (fd >= 0) { + if (++incdepth > 10) + error(FATAL, "#include too deeply nested"); + setsource((char*)newstring((uchar*)iname, strlen(iname), 0), fd, NULL); + genline(); + } else { + trp->tp = trp->bp+2; + error(ERROR, "Could not find include file %r", trp); + } + return; +syntax: + error(ERROR, "Syntax error in #include"); + return; +} + +/* + * Generate a line directive for cursource + */ +void +genline(void) +{ + static Token ta = { UNCLASS }; + static Tokenrow tr = { &ta, &ta, &ta+1, 1 }; + uchar *p; + + ta.t = p = (uchar*)outp; + strcpy((char*)p, "#line "); + p += sizeof("#line ")-1; + p = (uchar*)outnum((char*)p, cursource->line); + *p++ = ' '; *p++ = '"'; + if (cursource->filename[0]!='/' && wd[0]) { + strcpy((char*)p, wd); + p += strlen(wd); + *p++ = '/'; + } + strcpy((char*)p, cursource->filename); + p += strlen((char*)p); + *p++ = '"'; *p++ = '\n'; + ta.len = (char*)p-outp; + outp = (char*)p; + tr.tp = tr.bp; + puttokens(&tr); +} + +void +setobjname(char *f) +{ + int n = strlen(f); + objname = (char*)domalloc(n+5); + strcpy(objname,f); + if(objname[n-2]=='.'){ + strcpy(objname+n-1,"$O: "); + }else{ + strcpy(objname+n,"$O: "); + } +} diff --git a/src/tools/lcc/cpp/lex.c b/src/tools/lcc/cpp/lex.c new file mode 100644 index 0000000..8030354 --- /dev/null +++ b/src/tools/lcc/cpp/lex.c @@ -0,0 +1,580 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +/* + * lexical FSM encoding + * when in state state, and one of the characters + * in ch arrives, enter nextstate. + * States >= S_SELF are either final, or at least require special action. + * In 'fsm' there is a line for each state X charset X nextstate. + * List chars that overwrite previous entries later (e.g. C_ALPH + * can be overridden by '_' by a later entry; and C_XX is the + * the universal set, and should always be first. + * States above S_SELF are represented in the big table as negative values. + * S_SELF and S_SELFB encode the resulting token type in the upper bits. + * These actions differ in that S_SELF doesn't have a lookahead char, + * S_SELFB does. + * + * The encoding is blown out into a big table for time-efficiency. + * Entries have + * nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits. + */ + +#define MAXSTATE 32 +#define ACT(tok,act) ((tok<<7)+act) +#define QBSBIT 0100 +#define GETACT(st) (st>>7)&0x1ff + +/* character classes */ +#define C_WS 1 +#define C_ALPH 2 +#define C_NUM 3 +#define C_EOF 4 +#define C_XX 5 + +enum state { + START=0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4, + CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1, + CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1, + S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR, + S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME +}; + +int tottok; +int tokkind[256]; +struct fsm { + int state; /* if in this state */ + uchar ch[4]; /* and see one of these characters */ + int nextstate; /* enter this state if +ve */ +}; + +/*const*/ struct fsm fsm[] = { + /* start state */ + {START, { C_XX }, ACT(UNCLASS,S_SELF)}, + {START, { ' ', '\t', '\v' }, WS1}, + {START, { C_NUM }, NUM1}, + {START, { '.' }, NUM3}, + {START, { C_ALPH }, ID1}, + {START, { 'L' }, ST1}, + {START, { '"' }, ST2}, + {START, { '\'' }, CC1}, + {START, { '/' }, COM1}, + {START, { EOFC }, S_EOF}, + {START, { '\n' }, S_NL}, + {START, { '-' }, MINUS1}, + {START, { '+' }, PLUS1}, + {START, { '<' }, LT1}, + {START, { '>' }, GT1}, + {START, { '=' }, ASG1}, + {START, { '!' }, NOT1}, + {START, { '&' }, AND1}, + {START, { '|' }, OR1}, + {START, { '#' }, SHARP1}, + {START, { '%' }, PCT1}, + {START, { '[' }, ACT(SBRA,S_SELF)}, + {START, { ']' }, ACT(SKET,S_SELF)}, + {START, { '(' }, ACT(LP,S_SELF)}, + {START, { ')' }, ACT(RP,S_SELF)}, + {START, { '*' }, STAR1}, + {START, { ',' }, ACT(COMMA,S_SELF)}, + {START, { '?' }, ACT(QUEST,S_SELF)}, + {START, { ':' }, ACT(COLON,S_SELF)}, + {START, { ';' }, ACT(SEMIC,S_SELF)}, + {START, { '{' }, ACT(CBRA,S_SELF)}, + {START, { '}' }, ACT(CKET,S_SELF)}, + {START, { '~' }, ACT(TILDE,S_SELF)}, + {START, { '^' }, CIRC1}, + + /* saw a digit */ + {NUM1, { C_XX }, ACT(NUMBER,S_SELFB)}, + {NUM1, { C_NUM, C_ALPH, '.' }, NUM1}, + {NUM1, { 'E', 'e' }, NUM2}, + {NUM1, { '_' }, ACT(NUMBER,S_SELFB)}, + + /* saw possible start of exponent, digits-e */ + {NUM2, { C_XX }, ACT(NUMBER,S_SELFB)}, + {NUM2, { '+', '-' }, NUM1}, + {NUM2, { C_NUM, C_ALPH }, NUM1}, + {NUM2, { '_' }, ACT(NUMBER,S_SELFB)}, + + /* saw a '.', which could be a number or an operator */ + {NUM3, { C_XX }, ACT(DOT,S_SELFB)}, + {NUM3, { '.' }, DOTS1}, + {NUM3, { C_NUM }, NUM1}, + + {DOTS1, { C_XX }, ACT(UNCLASS, S_SELFB)}, + {DOTS1, { C_NUM }, NUM1}, + {DOTS1, { '.' }, ACT(ELLIPS, S_SELF)}, + + /* saw a letter or _ */ + {ID1, { C_XX }, ACT(NAME,S_NAME)}, + {ID1, { C_ALPH, C_NUM }, ID1}, + + /* saw L (start of wide string?) */ + {ST1, { C_XX }, ACT(NAME,S_NAME)}, + {ST1, { C_ALPH, C_NUM }, ID1}, + {ST1, { '"' }, ST2}, + {ST1, { '\'' }, CC1}, + + /* saw " beginning string */ + {ST2, { C_XX }, ST2}, + {ST2, { '"' }, ACT(STRING, S_SELF)}, + {ST2, { '\\' }, ST3}, + {ST2, { '\n' }, S_STNL}, + {ST2, { EOFC }, S_EOFSTR}, + + /* saw \ in string */ + {ST3, { C_XX }, ST2}, + {ST3, { '\n' }, S_STNL}, + {ST3, { EOFC }, S_EOFSTR}, + + /* saw ' beginning character const */ + {CC1, { C_XX }, CC1}, + {CC1, { '\'' }, ACT(CCON, S_SELF)}, + {CC1, { '\\' }, CC2}, + {CC1, { '\n' }, S_STNL}, + {CC1, { EOFC }, S_EOFSTR}, + + /* saw \ in ccon */ + {CC2, { C_XX }, CC1}, + {CC2, { '\n' }, S_STNL}, + {CC2, { EOFC }, S_EOFSTR}, + + /* saw /, perhaps start of comment */ + {COM1, { C_XX }, ACT(SLASH, S_SELFB)}, + {COM1, { '=' }, ACT(ASSLASH, S_SELF)}, + {COM1, { '*' }, COM2}, + {COM1, { '/' }, COM4}, + + /* saw / then *, start of comment */ + {COM2, { C_XX }, COM2}, + {COM2, { '\n' }, S_COMNL}, + {COM2, { '*' }, COM3}, + {COM2, { EOFC }, S_EOFCOM}, + + /* saw the * possibly ending a comment */ + {COM3, { C_XX }, COM2}, + {COM3, { '\n' }, S_COMNL}, + {COM3, { '*' }, COM3}, + {COM3, { '/' }, S_COMMENT}, + + /* // comment */ + {COM4, { C_XX }, COM4}, + {COM4, { '\n' }, S_NL}, + {COM4, { EOFC }, S_EOFCOM}, + + /* saw white space, eat it up */ + {WS1, { C_XX }, S_WS}, + {WS1, { ' ', '\t', '\v' }, WS1}, + + /* saw -, check --, -=, -> */ + {MINUS1, { C_XX }, ACT(MINUS, S_SELFB)}, + {MINUS1, { '-' }, ACT(MMINUS, S_SELF)}, + {MINUS1, { '=' }, ACT(ASMINUS,S_SELF)}, + {MINUS1, { '>' }, ACT(ARROW,S_SELF)}, + + /* saw +, check ++, += */ + {PLUS1, { C_XX }, ACT(PLUS, S_SELFB)}, + {PLUS1, { '+' }, ACT(PPLUS, S_SELF)}, + {PLUS1, { '=' }, ACT(ASPLUS, S_SELF)}, + + /* saw <, check <<, <<=, <= */ + {LT1, { C_XX }, ACT(LT, S_SELFB)}, + {LT1, { '<' }, LT2}, + {LT1, { '=' }, ACT(LEQ, S_SELF)}, + {LT2, { C_XX }, ACT(LSH, S_SELFB)}, + {LT2, { '=' }, ACT(ASLSH, S_SELF)}, + + /* saw >, check >>, >>=, >= */ + {GT1, { C_XX }, ACT(GT, S_SELFB)}, + {GT1, { '>' }, GT2}, + {GT1, { '=' }, ACT(GEQ, S_SELF)}, + {GT2, { C_XX }, ACT(RSH, S_SELFB)}, + {GT2, { '=' }, ACT(ASRSH, S_SELF)}, + + /* = */ + {ASG1, { C_XX }, ACT(ASGN, S_SELFB)}, + {ASG1, { '=' }, ACT(EQ, S_SELF)}, + + /* ! */ + {NOT1, { C_XX }, ACT(NOT, S_SELFB)}, + {NOT1, { '=' }, ACT(NEQ, S_SELF)}, + + /* & */ + {AND1, { C_XX }, ACT(AND, S_SELFB)}, + {AND1, { '&' }, ACT(LAND, S_SELF)}, + {AND1, { '=' }, ACT(ASAND, S_SELF)}, + + /* | */ + {OR1, { C_XX }, ACT(OR, S_SELFB)}, + {OR1, { '|' }, ACT(LOR, S_SELF)}, + {OR1, { '=' }, ACT(ASOR, S_SELF)}, + + /* # */ + {SHARP1, { C_XX }, ACT(SHARP, S_SELFB)}, + {SHARP1, { '#' }, ACT(DSHARP, S_SELF)}, + + /* % */ + {PCT1, { C_XX }, ACT(PCT, S_SELFB)}, + {PCT1, { '=' }, ACT(ASPCT, S_SELF)}, + + /* * */ + {STAR1, { C_XX }, ACT(STAR, S_SELFB)}, + {STAR1, { '=' }, ACT(ASSTAR, S_SELF)}, + + /* ^ */ + {CIRC1, { C_XX }, ACT(CIRC, S_SELFB)}, + {CIRC1, { '=' }, ACT(ASCIRC, S_SELF)}, + + {-1} +}; + +/* first index is char, second is state */ +/* increase #states to power of 2 to encourage use of shift */ +short bigfsm[256][MAXSTATE]; + +void +expandlex(void) +{ + /*const*/ struct fsm *fp; + int i, j, nstate; + + for (fp = fsm; fp->state>=0; fp++) { + for (i=0; fp->ch[i]; i++) { + nstate = fp->nextstate; + if (nstate >= S_SELF) + nstate = ~nstate; + switch (fp->ch[i]) { + + case C_XX: /* random characters */ + for (j=0; j<256; j++) + bigfsm[j][fp->state] = nstate; + continue; + case C_ALPH: + for (j=0; j<=256; j++) + if (('a'<=j&&j<='z') || ('A'<=j&&j<='Z') + || j=='_') + bigfsm[j][fp->state] = nstate; + continue; + case C_NUM: + for (j='0'; j<='9'; j++) + bigfsm[j][fp->state] = nstate; + continue; + default: + bigfsm[fp->ch[i]][fp->state] = nstate; + } + } + } + /* install special cases for ? (trigraphs), \ (splicing), runes, and EOB */ + for (i=0; i<MAXSTATE; i++) { + for (j=0; j<0xFF; j++) + if (j=='?' || j=='\\') { + if (bigfsm[j][i]>0) + bigfsm[j][i] = ~bigfsm[j][i]; + bigfsm[j][i] &= ~QBSBIT; + } + bigfsm[EOB][i] = ~S_EOB; + if (bigfsm[EOFC][i]>=0) + bigfsm[EOFC][i] = ~S_EOF; + } +} + +void +fixlex(void) +{ + /* do C++ comments? */ + if (Cplusplus==0) + bigfsm['/'][COM1] = bigfsm['x'][COM1]; +} + +/* + * fill in a row of tokens from input, terminated by NL or END + * First token is put at trp->lp. + * Reset is non-zero when the input buffer can be "rewound." + * The value is a flag indicating that possible macros have + * been seen in the row. + */ +int +gettokens(Tokenrow *trp, int reset) +{ + register int c, state, oldstate; + register uchar *ip; + register Token *tp, *maxp; + int runelen; + Source *s = cursource; + int nmac = 0; + + tp = trp->lp; + ip = s->inp; + if (reset) { + s->lineinc = 0; + if (ip>=s->inl) { /* nothing in buffer */ + s->inl = s->inb; + fillbuf(s); + ip = s->inp = s->inb; + } else if (ip >= s->inb+(3*INS/4)) { + memmove(s->inb, ip, 4+s->inl-ip); + s->inl = s->inb+(s->inl-ip); + ip = s->inp = s->inb; + } + } + maxp = &trp->bp[trp->max]; + runelen = 1; + for (;;) { + continue2: + if (tp>=maxp) { + trp->lp = tp; + tp = growtokenrow(trp); + maxp = &trp->bp[trp->max]; + } + tp->type = UNCLASS; + tp->hideset = 0; + tp->t = ip; + tp->wslen = 0; + tp->flag = 0; + state = START; + for (;;) { + oldstate = state; + c = *ip; + if ((state = bigfsm[c][state]) >= 0) { + ip += runelen; + runelen = 1; + continue; + } + state = ~state; + reswitch: + switch (state&0177) { + case S_SELF: + ip += runelen; + runelen = 1; + case S_SELFB: + tp->type = GETACT(state); + tp->len = ip - tp->t; + tp++; + goto continue2; + + case S_NAME: /* like S_SELFB but with nmac check */ + tp->type = NAME; + tp->len = ip - tp->t; + nmac |= quicklook(tp->t[0], tp->len>1?tp->t[1]:0); + tp++; + goto continue2; + + case S_WS: + tp->wslen = ip - tp->t; + tp->t = ip; + state = START; + continue; + + default: + if ((state&QBSBIT)==0) { + ip += runelen; + runelen = 1; + continue; + } + state &= ~QBSBIT; + s->inp = ip; + if (c=='?') { /* check trigraph */ + if (trigraph(s)) { + state = oldstate; + continue; + } + goto reswitch; + } + if (c=='\\') { /* line-folding */ + if (foldline(s)) { + s->lineinc++; + state = oldstate; + continue; + } + goto reswitch; + } + error(WARNING, "Lexical botch in cpp"); + ip += runelen; + runelen = 1; + continue; + + case S_EOB: + s->inp = ip; + fillbuf(cursource); + state = oldstate; + continue; + + case S_EOF: + tp->type = END; + tp->len = 0; + s->inp = ip; + if (tp!=trp->bp && (tp-1)->type!=NL && cursource->fd!=-1) + error(WARNING,"No newline at end of file"); + trp->lp = tp+1; + return nmac; + + case S_STNL: + error(ERROR, "Unterminated string or char const"); + case S_NL: + tp->t = ip; + tp->type = NL; + tp->len = 1; + tp->wslen = 0; + s->lineinc++; + s->inp = ip+1; + trp->lp = tp+1; + return nmac; + + case S_EOFSTR: + error(FATAL, "EOF in string or char constant"); + break; + + case S_COMNL: + s->lineinc++; + state = COM2; + ip += runelen; + runelen = 1; + if (ip >= s->inb+(7*INS/8)) { /* very long comment */ + memmove(tp->t, ip, 4+s->inl-ip); + s->inl -= ip-tp->t; + ip = tp->t+1; + } + continue; + + case S_EOFCOM: + error(WARNING, "EOF inside comment"); + --ip; + case S_COMMENT: + ++ip; + tp->t = ip; + tp->t[-1] = ' '; + tp->wslen = 1; + state = START; + continue; + } + break; + } + ip += runelen; + runelen = 1; + tp->len = ip - tp->t; + tp++; + } +} + +/* have seen ?; handle the trigraph it starts (if any) else 0 */ +int +trigraph(Source *s) +{ + int c; + + while (s->inp+2 >= s->inl && fillbuf(s)!=EOF) + ; + if (s->inp[1]!='?') + return 0; + c = 0; + switch(s->inp[2]) { + case '=': + c = '#'; break; + case '(': + c = '['; break; + case '/': + c = '\\'; break; + case ')': + c = ']'; break; + case '\'': + c = '^'; break; + case '<': + c = '{'; break; + case '!': + c = '|'; break; + case '>': + c = '}'; break; + case '-': + c = '~'; break; + } + if (c) { + *s->inp = c; + memmove(s->inp+1, s->inp+3, s->inl-s->inp+2); + s->inl -= 2; + } + return c; +} + +int +foldline(Source *s) +{ + while (s->inp+1 >= s->inl && fillbuf(s)!=EOF) + ; + if (s->inp[1] == '\n') { + memmove(s->inp, s->inp+2, s->inl-s->inp+3); + s->inl -= 2; + return 1; + } + return 0; +} + +int +fillbuf(Source *s) +{ + int n, nr; + + nr = INS/8; + if ((char *)s->inl+nr > (char *)s->inb+INS) + error(FATAL, "Input buffer overflow"); + if (s->fd<0 || (n=read(s->fd, (char *)s->inl, INS/8)) <= 0) + n = 0; + if ((*s->inp&0xff) == EOB) /* sentinel character appears in input */ + *s->inp = EOFC; + s->inl += n; + s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB; + if (n==0) { + s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC; + return EOF; + } + return 0; +} + +/* + * Push down to new source of characters. + * If fd>0 and str==NULL, then from a file `name'; + * if fd==-1 and str, then from the string. + */ +Source * +setsource(char *name, int fd, char *str) +{ + Source *s = new(Source); + int len; + + s->line = 1; + s->lineinc = 0; + s->fd = fd; + s->filename = name; + s->next = cursource; + s->ifdepth = 0; + cursource = s; + /* slop at right for EOB */ + if (str) { + len = strlen(str); + s->inb = domalloc(len+4); + s->inp = s->inb; + strncpy((char *)s->inp, str, len); + } else { + s->inb = domalloc(INS+4); + s->inp = s->inb; + len = 0; + } + s->inl = s->inp+len; + s->inl[0] = s->inl[1] = EOB; + return s; +} + +void +unsetsource(void) +{ + Source *s = cursource; + + if (s->fd>=0) { + close(s->fd); + dofree(s->inb); + } + cursource = s->next; + dofree(s); +} diff --git a/src/tools/lcc/cpp/macro.c b/src/tools/lcc/cpp/macro.c new file mode 100644 index 0000000..49d1129 --- /dev/null +++ b/src/tools/lcc/cpp/macro.c @@ -0,0 +1,515 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +/* + * do a macro definition. tp points to the name being defined in the line + */ +void +dodefine(Tokenrow *trp) +{ + Token *tp; + Nlist *np; + Tokenrow *def, *args; + + tp = trp->tp+1; + if (tp>=trp->lp || tp->type!=NAME) { + error(ERROR, "#defined token is not a name"); + return; + } + np = lookup(tp, 1); + if (np->flag&ISUNCHANGE) { + error(ERROR, "#defined token %t can't be redefined", tp); + return; + } + /* collect arguments */ + tp += 1; + args = NULL; + if (tp<trp->lp && tp->type==LP && tp->wslen==0) { + /* macro with args */ + int narg = 0; + tp += 1; + args = new(Tokenrow); + maketokenrow(2, args); + if (tp->type!=RP) { + int err = 0; + for (;;) { + Token *atp; + if (tp->type!=NAME) { + err++; + break; + } + if (narg>=args->max) + growtokenrow(args); + for (atp=args->bp; atp<args->lp; atp++) + if (atp->len==tp->len + && strncmp((char*)atp->t, (char*)tp->t, tp->len)==0) + error(ERROR, "Duplicate macro argument"); + *args->lp++ = *tp; + narg++; + tp += 1; + if (tp->type==RP) + break; + if (tp->type!=COMMA) { + err++; + break; + } + tp += 1; + } + if (err) { + error(ERROR, "Syntax error in macro parameters"); + return; + } + } + tp += 1; + } + trp->tp = tp; + if (((trp->lp)-1)->type==NL) + trp->lp -= 1; + def = normtokenrow(trp); + if (np->flag&ISDEFINED) { + if (comparetokens(def, np->vp) + || (np->ap==NULL) != (args==NULL) + || (np->ap && comparetokens(args, np->ap))) + error(ERROR, "Macro redefinition of %t", trp->bp+2); + } + if (args) { + Tokenrow *tap; + tap = normtokenrow(args); + dofree(args->bp); + args = tap; + } + np->ap = args; + np->vp = def; + np->flag |= ISDEFINED; +} + +/* + * Definition received via -D or -U + */ +void +doadefine(Tokenrow *trp, int type) +{ + Nlist *np; + static Token onetoken[1] = {{ NUMBER, 0, 0, 0, 1, (uchar*)"1" }}; + static Tokenrow onetr = { onetoken, onetoken, onetoken+1, 1 }; + + trp->tp = trp->bp; + if (type=='U') { + if (trp->lp-trp->tp != 2 || trp->tp->type!=NAME) + goto syntax; + if ((np = lookup(trp->tp, 0)) == NULL) + return; + np->flag &= ~ISDEFINED; + return; + } + if (trp->tp >= trp->lp || trp->tp->type!=NAME) + goto syntax; + np = lookup(trp->tp, 1); + np->flag |= ISDEFINED; + trp->tp += 1; + if (trp->tp >= trp->lp || trp->tp->type==END) { + np->vp = &onetr; + return; + } + if (trp->tp->type!=ASGN) + goto syntax; + trp->tp += 1; + if ((trp->lp-1)->type == END) + trp->lp -= 1; + np->vp = normtokenrow(trp); + return; +syntax: + error(FATAL, "Illegal -D or -U argument %r", trp); +} + +/* + * Do macro expansion in a row of tokens. + * Flag is NULL if more input can be gathered. + */ +void +expandrow(Tokenrow *trp, char *flag) +{ + Token *tp; + Nlist *np; + + if (flag) + setsource(flag, -1, ""); + for (tp = trp->tp; tp<trp->lp; ) { + if (tp->type!=NAME + || quicklook(tp->t[0], tp->len>1?tp->t[1]:0)==0 + || (np = lookup(tp, 0))==NULL + || (np->flag&(ISDEFINED|ISMAC))==0 + || (tp->hideset && checkhideset(tp->hideset, np))) { + tp++; + continue; + } + trp->tp = tp; + if (np->val==KDEFINED) { + tp->type = DEFINED; + if ((tp+1)<trp->lp && (tp+1)->type==NAME) + (tp+1)->type = NAME1; + else if ((tp+3)<trp->lp && (tp+1)->type==LP + && (tp+2)->type==NAME && (tp+3)->type==RP) + (tp+2)->type = NAME1; + else + error(ERROR, "Incorrect syntax for `defined'"); + tp++; + continue; + } + if (np->flag&ISMAC) + builtin(trp, np->val); + else { + expand(trp, np); + } + tp = trp->tp; + } + if (flag) + unsetsource(); +} + +/* + * Expand the macro whose name is np, at token trp->tp, in the tokenrow. + * Return trp->tp at the first token next to be expanded + * (ordinarily the beginning of the expansion) + */ +void +expand(Tokenrow *trp, Nlist *np) +{ + Tokenrow ntr; + int ntokc, narg, i; + Token *tp; + Tokenrow *atr[NARG+1]; + int hs; + + copytokenrow(&ntr, np->vp); /* copy macro value */ + if (np->ap==NULL) /* parameterless */ + ntokc = 1; + else { + ntokc = gatherargs(trp, atr, &narg); + if (narg<0) { /* not actually a call (no '(') */ + trp->tp++; + return; + } + if (narg != rowlen(np->ap)) { + error(ERROR, "Disagreement in number of macro arguments"); + trp->tp->hideset = newhideset(trp->tp->hideset, np); + trp->tp += ntokc; + return; + } + substargs(np, &ntr, atr); /* put args into replacement */ + for (i=0; i<narg; i++) { + dofree(atr[i]->bp); + dofree(atr[i]); + } + } + doconcat(&ntr); /* execute ## operators */ + hs = newhideset(trp->tp->hideset, np); + for (tp=ntr.bp; tp<ntr.lp; tp++) { /* distribute hidesets */ + if (tp->type==NAME) { + if (tp->hideset==0) + tp->hideset = hs; + else + tp->hideset = unionhideset(tp->hideset, hs); + } + } + ntr.tp = ntr.bp; + insertrow(trp, ntokc, &ntr); + trp->tp -= rowlen(&ntr); + dofree(ntr.bp); + return; +} + +/* + * Gather an arglist, starting in trp with tp pointing at the macro name. + * Return total number of tokens passed, stash number of args found. + * trp->tp is not changed relative to the tokenrow. + */ +int +gatherargs(Tokenrow *trp, Tokenrow **atr, int *narg) +{ + int parens = 1; + int ntok = 0; + Token *bp, *lp; + Tokenrow ttr; + int ntokp; + int needspace; + + *narg = -1; /* means that there is no macro call */ + /* look for the ( */ + for (;;) { + trp->tp++; + ntok++; + if (trp->tp >= trp->lp) { + gettokens(trp, 0); + if ((trp->lp-1)->type==END) { + trp->lp -= 1; + trp->tp -= ntok; + return ntok; + } + } + if (trp->tp->type==LP) + break; + if (trp->tp->type!=NL) + return ntok; + } + *narg = 0; + ntok++; + ntokp = ntok; + trp->tp++; + /* search for the terminating ), possibly extending the row */ + needspace = 0; + while (parens>0) { + if (trp->tp >= trp->lp) + gettokens(trp, 0); + if (needspace) { + needspace = 0; + makespace(trp); + } + if (trp->tp->type==END) { + trp->lp -= 1; + trp->tp -= ntok; + error(ERROR, "EOF in macro arglist"); + return ntok; + } + if (trp->tp->type==NL) { + trp->tp += 1; + adjustrow(trp, -1); + trp->tp -= 1; + makespace(trp); + needspace = 1; + continue; + } + if (trp->tp->type==LP) + parens++; + else if (trp->tp->type==RP) + parens--; + trp->tp++; + ntok++; + } + trp->tp -= ntok; + /* Now trp->tp won't move underneath us */ + lp = bp = trp->tp+ntokp; + for (; parens>=0; lp++) { + if (lp->type == LP) { + parens++; + continue; + } + if (lp->type==RP) + parens--; + if (lp->type==DSHARP) + lp->type = DSHARP1; /* ## not special in arg */ + if ((lp->type==COMMA && parens==0) || (parens<0 && (lp-1)->type!=LP)) { + if (*narg>=NARG-1) + error(FATAL, "Sorry, too many macro arguments"); + ttr.bp = ttr.tp = bp; + ttr.lp = lp; + atr[(*narg)++] = normtokenrow(&ttr); + bp = lp+1; + } + } + return ntok; +} + +/* + * substitute the argument list into the replacement string + * This would be simple except for ## and # + */ +void +substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr) +{ + Tokenrow tatr; + Token *tp; + int ntok, argno; + + for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) { + if (rtr->tp->type==SHARP) { /* string operator */ + tp = rtr->tp; + rtr->tp += 1; + if ((argno = lookuparg(np, rtr->tp))<0) { + error(ERROR, "# not followed by macro parameter"); + continue; + } + ntok = 1 + (rtr->tp - tp); + rtr->tp = tp; + insertrow(rtr, ntok, stringify(atr[argno])); + continue; + } + if (rtr->tp->type==NAME + && (argno = lookuparg(np, rtr->tp)) >= 0) { + if ((rtr->tp+1)->type==DSHARP + || (rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)) + insertrow(rtr, 1, atr[argno]); + else { + copytokenrow(&tatr, atr[argno]); + expandrow(&tatr, "<macro>"); + insertrow(rtr, 1, &tatr); + dofree(tatr.bp); + } + continue; + } + rtr->tp++; + } +} + +/* + * Evaluate the ## operators in a tokenrow + */ +void +doconcat(Tokenrow *trp) +{ + Token *ltp, *ntp; + Tokenrow ntr; + int len; + + for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) { + if (trp->tp->type==DSHARP1) + trp->tp->type = DSHARP; + else if (trp->tp->type==DSHARP) { + char tt[128]; + ltp = trp->tp-1; + ntp = trp->tp+1; + if (ltp<trp->bp || ntp>=trp->lp) { + error(ERROR, "## occurs at border of replacement"); + continue; + } + len = ltp->len + ntp->len; + strncpy((char*)tt, (char*)ltp->t, ltp->len); + strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len); + tt[len] = '\0'; + setsource("<##>", -1, tt); + maketokenrow(3, &ntr); + gettokens(&ntr, 1); + unsetsource(); + if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS) + error(WARNING, "Bad token %r produced by ##", &ntr); + ntr.lp = ntr.bp+1; + trp->tp = ltp; + makespace(&ntr); + insertrow(trp, (ntp-ltp)+1, &ntr); + dofree(ntr.bp); + trp->tp--; + } + } +} + +/* + * tp is a potential parameter name of macro mac; + * look it up in mac's arglist, and if found, return the + * corresponding index in the argname array. Return -1 if not found. + */ +int +lookuparg(Nlist *mac, Token *tp) +{ + Token *ap; + + if (tp->type!=NAME || mac->ap==NULL) + return -1; + for (ap=mac->ap->bp; ap<mac->ap->lp; ap++) { + if (ap->len==tp->len && strncmp((char*)ap->t,(char*)tp->t,ap->len)==0) + return ap - mac->ap->bp; + } + return -1; +} + +/* + * Return a quoted version of the tokenrow (from # arg) + */ +#define STRLEN 512 +Tokenrow * +stringify(Tokenrow *vp) +{ + static Token t = { STRING }; + static Tokenrow tr = { &t, &t, &t+1, 1 }; + Token *tp; + uchar s[STRLEN]; + uchar *sp = s, *cp; + int i, instring; + + *sp++ = '"'; + for (tp = vp->bp; tp < vp->lp; tp++) { + instring = tp->type==STRING || tp->type==CCON; + if (sp+2*tp->len >= &s[STRLEN-10]) { + error(ERROR, "Stringified macro arg is too long"); + break; + } + if (tp->wslen && (tp->flag&XPWS)==0) + *sp++ = ' '; + for (i=0, cp=tp->t; i<tp->len; i++) { + if (instring && (*cp=='"' || *cp=='\\')) + *sp++ = '\\'; + *sp++ = *cp++; + } + } + *sp++ = '"'; + *sp = '\0'; + sp = s; + t.len = strlen((char*)sp); + t.t = newstring(sp, t.len, 0); + return &tr; +} + +/* + * expand a builtin name + */ +void +builtin(Tokenrow *trp, int biname) +{ + char *op; + Token *tp; + Source *s; + + tp = trp->tp; + trp->tp++; + /* need to find the real source */ + s = cursource; + while (s && s->fd==-1) + s = s->next; + if (s==NULL) + s = cursource; + /* most are strings */ + tp->type = STRING; + if (tp->wslen) { + *outp++ = ' '; + tp->wslen = 1; + } + op = outp; + *op++ = '"'; + switch (biname) { + + case KLINENO: + tp->type = NUMBER; + op = outnum(op-1, s->line); + break; + + case KFILE: { + char *src = s->filename; + while ((*op++ = *src++) != 0) + if (src[-1] == '\\') + *op++ = '\\'; + op--; + break; + } + + case KDATE: + strncpy(op, curtime+4, 7); + strncpy(op+7, curtime+20, 4); + op += 11; + break; + + case KTIME: + strncpy(op, curtime+11, 8); + op += 8; + break; + + default: + error(ERROR, "cpp botch: unknown internal macro"); + return; + } + if (tp->type==STRING) + *op++ = '"'; + tp->t = (uchar*)outp; + tp->len = op - outp; + outp = op; +} diff --git a/src/tools/lcc/cpp/nlist.c b/src/tools/lcc/cpp/nlist.c new file mode 100644 index 0000000..72089c4 --- /dev/null +++ b/src/tools/lcc/cpp/nlist.c @@ -0,0 +1,103 @@ +#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "cpp.h"
+
+extern char *optarg;
+extern int optind;
+extern int verbose;
+extern int Cplusplus;
+Nlist *kwdefined;
+char wd[128];
+
+#define NLSIZE 128
+
+static Nlist *nlist[NLSIZE];
+
+struct kwtab {
+ char *kw;
+ int val;
+ int flag;
+} kwtab[] = {
+ {"if", KIF, ISKW},
+ {"ifdef", KIFDEF, ISKW},
+ {"ifndef", KIFNDEF, ISKW},
+ {"elif", KELIF, ISKW},
+ {"else", KELSE, ISKW},
+ {"endif", KENDIF, ISKW},
+ {"include", KINCLUDE, ISKW},
+ {"define", KDEFINE, ISKW},
+ {"undef", KUNDEF, ISKW},
+ {"line", KLINE, ISKW},
+ {"error", KERROR, ISKW},
+ {"pragma", KPRAGMA, ISKW},
+ {"eval", KEVAL, ISKW},
+ {"defined", KDEFINED, ISDEFINED+ISUNCHANGE},
+ {"__LINE__", KLINENO, ISMAC+ISUNCHANGE},
+ {"__FILE__", KFILE, ISMAC+ISUNCHANGE},
+ {"__DATE__", KDATE, ISMAC+ISUNCHANGE},
+ {"__TIME__", KTIME, ISMAC+ISUNCHANGE},
+ {"__STDC__", KSTDC, ISUNCHANGE},
+ {NULL}
+};
+
+unsigned long namebit[077+1];
+Nlist *np;
+
+void
+setup_kwtab(void)
+{
+ struct kwtab *kp;
+ Nlist *np;
+ Token t;
+ static Token deftoken[1] = {{ NAME, 0, 0, 0, 7, (uchar*)"defined" }};
+ static Tokenrow deftr = { deftoken, deftoken, deftoken+1, 1 };
+
+ for (kp=kwtab; kp->kw; kp++) {
+ t.t = (uchar*)kp->kw;
+ t.len = strlen(kp->kw);
+ np = lookup(&t, 1);
+ np->flag = kp->flag;
+ np->val = kp->val;
+ if (np->val == KDEFINED) {
+ kwdefined = np;
+ np->val = NAME;
+ np->vp = &deftr;
+ np->ap = 0;
+ }
+ }
+}
+
+Nlist *
+lookup(Token *tp, int install)
+{
+ unsigned int h;
+ Nlist *np;
+ uchar *cp, *cpe;
+
+ h = 0;
+ for (cp=tp->t, cpe=cp+tp->len; cp<cpe; )
+ h += *cp++;
+ h %= NLSIZE;
+ np = nlist[h];
+ while (np) {
+ if (*tp->t==*np->name && tp->len==np->len
+ && strncmp((char*)tp->t, (char*)np->name, tp->len)==0)
+ return np;
+ np = np->next;
+ }
+ if (install) {
+ np = new(Nlist);
+ np->vp = NULL;
+ np->ap = NULL;
+ np->flag = 0;
+ np->val = 0;
+ np->len = tp->len;
+ np->name = newstring(tp->t, tp->len, 0);
+ np->next = nlist[h];
+ nlist[h] = np;
+ quickset(tp->t[0], tp->len>1? tp->t[1]:0);
+ return np;
+ }
+ return NULL;
+}
diff --git a/src/tools/lcc/cpp/tokens.c b/src/tools/lcc/cpp/tokens.c new file mode 100644 index 0000000..147569b --- /dev/null +++ b/src/tools/lcc/cpp/tokens.c @@ -0,0 +1,370 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "cpp.h" + +static char wbuf[2*OBS]; +static char *wbp = wbuf; + +/* + * 1 for tokens that don't need whitespace when they get inserted + * by macro expansion + */ +static const char wstab[] = { + 0, /* END */ + 0, /* UNCLASS */ + 0, /* NAME */ + 0, /* NUMBER */ + 0, /* STRING */ + 0, /* CCON */ + 1, /* NL */ + 0, /* WS */ + 0, /* DSHARP */ + 0, /* EQ */ + 0, /* NEQ */ + 0, /* LEQ */ + 0, /* GEQ */ + 0, /* LSH */ + 0, /* RSH */ + 0, /* LAND */ + 0, /* LOR */ + 0, /* PPLUS */ + 0, /* MMINUS */ + 0, /* ARROW */ + 1, /* SBRA */ + 1, /* SKET */ + 1, /* LP */ + 1, /* RP */ + 0, /* DOT */ + 0, /* AND */ + 0, /* STAR */ + 0, /* PLUS */ + 0, /* MINUS */ + 0, /* TILDE */ + 0, /* NOT */ + 0, /* SLASH */ + 0, /* PCT */ + 0, /* LT */ + 0, /* GT */ + 0, /* CIRC */ + 0, /* OR */ + 0, /* QUEST */ + 0, /* COLON */ + 0, /* ASGN */ + 1, /* COMMA */ + 0, /* SHARP */ + 1, /* SEMIC */ + 1, /* CBRA */ + 1, /* CKET */ + 0, /* ASPLUS */ + 0, /* ASMINUS */ + 0, /* ASSTAR */ + 0, /* ASSLASH */ + 0, /* ASPCT */ + 0, /* ASCIRC */ + 0, /* ASLSH */ + 0, /* ASRSH */ + 0, /* ASOR */ + 0, /* ASAND */ + 0, /* ELLIPS */ + 0, /* DSHARP1 */ + 0, /* NAME1 */ + 0, /* DEFINED */ + 0, /* UMINUS */ +}; + +void +maketokenrow(int size, Tokenrow *trp) +{ + trp->max = size; + if (size>0) + trp->bp = (Token *)domalloc(size*sizeof(Token)); + else + trp->bp = NULL; + trp->tp = trp->bp; + trp->lp = trp->bp; +} + +Token * +growtokenrow(Tokenrow *trp) +{ + int ncur = trp->tp - trp->bp; + int nlast = trp->lp - trp->bp; + + trp->max = 3*trp->max/2 + 1; + trp->bp = (Token *)realloc(trp->bp, trp->max*sizeof(Token)); + if (trp->bp == NULL) + error(FATAL, "Out of memory from realloc"); + trp->lp = &trp->bp[nlast]; + trp->tp = &trp->bp[ncur]; + return trp->lp; +} + +/* + * Compare a row of tokens, ignoring the content of WS; return !=0 if different + */ +int +comparetokens(Tokenrow *tr1, Tokenrow *tr2) +{ + Token *tp1, *tp2; + + tp1 = tr1->tp; + tp2 = tr2->tp; + if (tr1->lp-tp1 != tr2->lp-tp2) + return 1; + for (; tp1<tr1->lp ; tp1++, tp2++) { + if (tp1->type != tp2->type + || (tp1->wslen==0) != (tp2->wslen==0) + || tp1->len != tp2->len + || strncmp((char*)tp1->t, (char*)tp2->t, tp1->len)!=0) + return 1; + } + return 0; +} + +/* + * replace ntok tokens starting at dtr->tp with the contents of str. + * tp ends up pointing just beyond the replacement. + * Canonical whitespace is assured on each side. + */ +void +insertrow(Tokenrow *dtr, int ntok, Tokenrow *str) +{ + int nrtok = rowlen(str); + + dtr->tp += ntok; + adjustrow(dtr, nrtok-ntok); + dtr->tp -= ntok; + movetokenrow(dtr, str); + makespace(dtr); + dtr->tp += nrtok; + makespace(dtr); +} + +/* + * make sure there is WS before trp->tp, if tokens might merge in the output + */ +void +makespace(Tokenrow *trp) +{ + uchar *tt; + Token *tp = trp->tp; + + if (tp >= trp->lp) + return; + if (tp->wslen) { + if (tp->flag&XPWS + && (wstab[tp->type] || (trp->tp>trp->bp && wstab[(tp-1)->type]))) { + tp->wslen = 0; + return; + } + tp->t[-1] = ' '; + return; + } + if (wstab[tp->type] || (trp->tp>trp->bp && wstab[(tp-1)->type])) + return; + tt = newstring(tp->t, tp->len, 1); + *tt++ = ' '; + tp->t = tt; + tp->wslen = 1; + tp->flag |= XPWS; +} + +/* + * Copy an entire tokenrow into another, at tp. + * It is assumed that there is enough space. + * Not strictly conforming. + */ +void +movetokenrow(Tokenrow *dtr, Tokenrow *str) +{ + int nby; + + /* nby = sizeof(Token) * (str->lp - str->bp); */ + nby = (char *)str->lp - (char *)str->bp; + memmove(dtr->tp, str->bp, nby); +} + +/* + * Move the tokens in a row, starting at tr->tp, rightward by nt tokens; + * nt may be negative (left move). + * The row may need to be grown. + * Non-strictly conforming because of the (char *), but easily fixed + */ +void +adjustrow(Tokenrow *trp, int nt) +{ + int nby, size; + + if (nt==0) + return; + size = (trp->lp - trp->bp) + nt; + while (size > trp->max) + growtokenrow(trp); + /* nby = sizeof(Token) * (trp->lp - trp->tp); */ + nby = (char *)trp->lp - (char *)trp->tp; + if (nby) + memmove(trp->tp+nt, trp->tp, nby); + trp->lp += nt; +} + +/* + * Copy a row of tokens into the destination holder, allocating + * the space for the contents. Return the destination. + */ +Tokenrow * +copytokenrow(Tokenrow *dtr, Tokenrow *str) +{ + int len = rowlen(str); + + maketokenrow(len, dtr); + movetokenrow(dtr, str); + dtr->lp += len; + return dtr; +} + +/* + * Produce a copy of a row of tokens. Start at trp->tp. + * The value strings are copied as well. The first token + * has WS available. + */ +Tokenrow * +normtokenrow(Tokenrow *trp) +{ + Token *tp; + Tokenrow *ntrp = new(Tokenrow); + int len; + + len = trp->lp - trp->tp; + if (len<=0) + len = 1; + maketokenrow(len, ntrp); + for (tp=trp->tp; tp < trp->lp; tp++) { + *ntrp->lp = *tp; + if (tp->len) { + ntrp->lp->t = newstring(tp->t, tp->len, 1); + *ntrp->lp->t++ = ' '; + if (tp->wslen) + ntrp->lp->wslen = 1; + } + ntrp->lp++; + } + if (ntrp->lp > ntrp->bp) + ntrp->bp->wslen = 0; + return ntrp; +} + +/* + * Debugging + */ +void +peektokens(Tokenrow *trp, char *str) +{ + Token *tp; + + tp = trp->tp; + flushout(); + if (str) + fprintf(stderr, "%s ", str); + if (tp<trp->bp || tp>trp->lp) + fprintf(stderr, "(tp offset %d) ", tp-trp->bp); + for (tp=trp->bp; tp<trp->lp && tp<trp->bp+32; tp++) { + if (tp->type!=NL) { + int c = tp->t[tp->len]; + tp->t[tp->len] = 0; + fprintf(stderr, "%s", tp->t); + tp->t[tp->len] = c; + } + if (tp->type==NAME) { + fprintf(stderr, tp==trp->tp?"{*":"{"); + prhideset(tp->hideset); + fprintf(stderr, "} "); + } else + fprintf(stderr, tp==trp->tp?"{%x*} ":"{%x} ", tp->type); + } + fprintf(stderr, "\n"); + fflush(stderr); +} + +void +puttokens(Tokenrow *trp) +{ + Token *tp; + int len; + uchar *p; + + if (verbose) + peektokens(trp, ""); + tp = trp->bp; + for (; tp<trp->lp; tp++) { + len = tp->len+tp->wslen; + p = tp->t-tp->wslen; + while (tp<trp->lp-1 && p+len == (tp+1)->t - (tp+1)->wslen) { + tp++; + len += tp->wslen+tp->len; + } + if (len>OBS/2) { /* handle giant token */ + if (wbp > wbuf) + write(1, wbuf, wbp-wbuf); + write(1, (char *)p, len); + wbp = wbuf; + } else { + memcpy(wbp, p, len); + wbp += len; + } + if (wbp >= &wbuf[OBS]) { + write(1, wbuf, OBS); + if (wbp > &wbuf[OBS]) + memcpy(wbuf, wbuf+OBS, wbp - &wbuf[OBS]); + wbp -= OBS; + } + } + trp->tp = tp; + if (cursource->fd==0) + flushout(); +} + +void +flushout(void) +{ + if (wbp>wbuf) { + write(1, wbuf, wbp-wbuf); + wbp = wbuf; + } +} + +/* + * turn a row into just a newline + */ +void +setempty(Tokenrow *trp) +{ + trp->tp = trp->bp; + trp->lp = trp->bp+1; + *trp->bp = nltoken; +} + +/* + * generate a number + */ +char * +outnum(char *p, int n) +{ + if (n>=10) + p = outnum(p, n/10); + *p++ = n%10 + '0'; + return p; +} + +/* + * allocate and initialize a new string from s, of length l, at offset o + * Null terminated. + */ +uchar * +newstring(uchar *s, int l, int o) +{ + uchar *ns = (uchar *)domalloc(l+o+1); + + ns[l+o] = '\0'; + return (uchar*)strncpy((char*)ns+o, (char*)s, l) - o; +} diff --git a/src/tools/lcc/cpp/unix.c b/src/tools/lcc/cpp/unix.c new file mode 100644 index 0000000..b7cf3af --- /dev/null +++ b/src/tools/lcc/cpp/unix.c @@ -0,0 +1,128 @@ +#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include "cpp.h"
+
+extern int lcc_getopt(int, char *const *, const char *);
+extern char *optarg, rcsid[];
+extern int optind;
+int verbose;
+int Mflag; /* only print active include files */
+char *objname; /* "src.$O: " */
+int Cplusplus = 1;
+
+void
+setup(int argc, char **argv)
+{
+ int c, fd, i;
+ char *fp, *dp;
+ Tokenrow tr;
+ extern void setup_kwtab(void);
+ uchar *includeDirs[ NINCLUDE ] = { 0 };
+ int numIncludeDirs = 0;
+
+ setup_kwtab();
+ while ((c = lcc_getopt(argc, argv, "MNOVv+I:D:U:F:lg")) != -1)
+ switch (c) {
+ case 'N':
+ for (i=0; i<NINCLUDE; i++)
+ if (includelist[i].always==1)
+ includelist[i].deleted = 1;
+ break;
+ case 'I':
+ includeDirs[ numIncludeDirs++ ] = newstring( (uchar *)optarg, strlen( optarg ), 0 );
+ break;
+ case 'D':
+ case 'U':
+ setsource("<cmdarg>", -1, optarg);
+ maketokenrow(3, &tr);
+ gettokens(&tr, 1);
+ doadefine(&tr, c);
+ unsetsource();
+ break;
+ case 'M':
+ Mflag++;
+ break;
+ case 'v':
+ fprintf(stderr, "%s %s\n", argv[0], rcsid);
+ break;
+ case 'V':
+ verbose++;
+ break;
+ case '+':
+ Cplusplus++;
+ break;
+ default:
+ break;
+ }
+ dp = ".";
+ fp = "<stdin>";
+ fd = 0;
+ if (optind<argc) {
+ dp = basepath( argv[optind] );
+ fp = (char*)newstring((uchar*)argv[optind], strlen(argv[optind]), 0);
+ if ((fd = open(fp, 0)) <= 0)
+ error(FATAL, "Can't open input file %s", fp);
+ }
+ if (optind+1<argc) {
+ int fdo = creat(argv[optind+1], 0666);
+ if (fdo<0)
+ error(FATAL, "Can't open output file %s", argv[optind+1]);
+ dup2(fdo, 1);
+ }
+ if(Mflag)
+ setobjname(fp);
+ includelist[NINCLUDE-1].always = 0;
+ includelist[NINCLUDE-1].file = dp;
+
+ for( i = 0; i < numIncludeDirs; i++ )
+ appendDirToIncludeList( (char *)includeDirs[ i ] );
+
+ setsource(fp, fd, NULL);
+}
+
+
+char *basepath( char *fname )
+{
+ char *dp = ".";
+ char *p;
+ if ((p = strrchr(fname, '/')) != NULL) {
+ int dlen = p - fname;
+ dp = (char*)newstring((uchar*)fname, dlen+1, 0);
+ dp[dlen] = '\0';
+ }
+
+ return dp;
+}
+
+/* memmove is defined here because some vendors don't provide it at
+ all and others do a terrible job (like calling malloc) */
+// -- ouch, that hurts -- ln
+#ifndef MACOS_X /* always use the system memmove() on Mac OS X. --ryan. */
+#ifdef memmove
+#undef memmove
+#endif
+void *
+memmove(void *dp, const void *sp, size_t n)
+{
+ unsigned char *cdp, *csp;
+
+ if (n<=0)
+ return 0;
+ cdp = dp;
+ csp = (unsigned char *)sp;
+ if (cdp < csp) {
+ do {
+ *cdp++ = *csp++;
+ } while (--n);
+ } else {
+ cdp += n;
+ csp += n;
+ do {
+ *--cdp = *--csp;
+ } while (--n);
+ }
+ return 0;
+}
+#endif
|