aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuentin Carbonneaux <[email protected]>2015-10-02 15:34:38 -0400
committerQuentin Carbonneaux <[email protected]>2015-10-02 15:34:38 -0400
commit30eced928c404caae41d62f238270f00bdf7c25b (patch)
tree6f305c2d318e4a9b2dbec9f5a38a704a090c96c1
parentecaad8119f77601fec4cab61c510d697ec5bb939 (diff)
start an example compiler for a subset of C
-rw-r--r--minic/LICENSE21
-rw-r--r--minic/Makefile8
-rw-r--r--minic/minic.y629
-rw-r--r--minic/test.minic9
-rw-r--r--minic/yacc.c1365
5 files changed, 2032 insertions, 0 deletions
diff --git a/minic/LICENSE b/minic/LICENSE
new file mode 100644
index 0000000..7866b82
--- /dev/null
+++ b/minic/LICENSE
@@ -0,0 +1,21 @@
+MIT/X Consortium License
+
+© 2015 Quentin Carbonneaux
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/minic/Makefile b/minic/Makefile
new file mode 100644
index 0000000..39b2946
--- /dev/null
+++ b/minic/Makefile
@@ -0,0 +1,8 @@
+CFLAGS=-g -Wall
+
+minic: yacc minic.y
+ ./yacc minic.y
+ $(CC) $(CFLAGS) -o $@ y.tab.c
+clean:
+ rm -f yacc minic y.*
+.PHONY: clean
diff --git a/minic/minic.y b/minic/minic.y
new file mode 100644
index 0000000..00351e1
--- /dev/null
+++ b/minic/minic.y
@@ -0,0 +1,629 @@
+%{
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+enum {
+ NString = 16,
+ NVar = 256,
+};
+
+
+enum { /* minic types */
+ INT = 0,
+ LNG = 1,
+ PTR = 2,
+};
+
+#define IDIR(x) (((x) << 2) + PTR)
+#define DREF(x) ((x) >> 2)
+#define KIND(x) ((x) & 3)
+#define SIZE(x) (KIND(x) == INT ? 4 : 8)
+
+typedef struct Node Node;
+typedef struct Symb Symb;
+typedef struct Stmt Stmt;
+
+struct Node {
+ char op;
+ union {
+ int n;
+ char v[NString];
+ } u;
+ Node *l, *r;
+};
+
+struct Symb {
+ enum {
+ Con,
+ Tmp,
+ Var,
+ } t;
+ union {
+ int n;
+ char v[NString];
+ } u;
+ unsigned long ctyp;
+};
+
+struct Stmt {
+ enum {
+ If,
+ While,
+ Seq,
+ Expr,
+ } t;
+ void *p1, *p2, *p3;
+};
+
+int yylex(void), yyerror(char *);
+Symb expr(Node *), lval(Node *);
+
+FILE *of;
+int lbl, tmp;
+struct {
+ char v[NString];
+ unsigned ctyp;
+} varh[NVar];
+
+void
+die(char *s)
+{
+ fprintf(stderr, "error: %s\n", s);
+ exit(1);
+}
+
+void *
+alloc(size_t s)
+{
+ void *p;
+
+ p = malloc(s);
+ if (!p)
+ die("out of memory");
+ return p;
+}
+
+unsigned
+hash(char *s)
+{
+ unsigned h;
+
+ h = 42;
+ while (*s)
+ h += 11 * h + *s++;
+ return h % NVar;
+}
+
+void
+varclr()
+{
+ unsigned h;
+
+ for (h=0; h<NVar; h++)
+ varh[h].v[0] = 0;
+}
+
+void
+varadd(char *v, unsigned ctyp)
+{
+ unsigned h0, h;
+
+ h0 = hash(v);
+ h = h0;
+ do {
+ if (varh[h].v[0] == 0) {
+ strcpy(varh[h].v, v);
+ varh[h].ctyp = ctyp;
+ return;
+ }
+ if (strcmp(varh[h].v, v) == 0)
+ die("double definition");
+ } while(++h != h0);
+ die("too many variables");
+}
+
+unsigned
+varctyp(char *v)
+{
+ unsigned h0, h;
+
+ h0 = hash(v);
+ h = h0;
+ do {
+ if (strcmp(varh[h].v, v) == 0)
+ return varh[h].ctyp;
+ } while (++h != h0 && varh[h].v[0] != 0);
+ die("undeclared variable");
+ return -1;
+}
+
+char
+irtyp(unsigned ctyp)
+{
+ switch (KIND(ctyp)) {
+ default:
+ die("internal error");
+ case INT:
+ return 'w';
+ case PTR:
+ case LNG:
+ return 'l';
+ }
+}
+
+void
+psymb(Symb s)
+{
+ switch (s.t) {
+ case Tmp:
+ fprintf(of, "%%t%d", s.u.n);
+ break;
+ case Var:
+ fprintf(of, "%%%s", s.u.v);
+ break;
+ case Con:
+ fprintf(of, "%d", s.u.n);
+ break;
+ }
+}
+
+void
+sext(Symb *s)
+{
+ fprintf(of, "\t%%t%d =l sext ", tmp);
+ psymb(*s);
+ fprintf(of, "\n");
+ s->t = Tmp;
+ s->ctyp = LNG;
+ s->u.n = tmp++;
+}
+
+unsigned
+prom(int op, Symb *l, Symb *r)
+{
+ Symb *t;
+ int sz;
+
+ if (l->ctyp == r->ctyp && KIND(l->ctyp) != PTR)
+ return l->ctyp;
+
+ if (l->ctyp == LNG && r->ctyp == INT) {
+ sext(r);
+ return LNG;
+ }
+ if (l->ctyp == INT && r->ctyp == LNG) {
+ sext(l);
+ return LNG;
+ }
+
+ if (op == '+') {
+ if (KIND(r->ctyp) == PTR) {
+ t = l;
+ l = r;
+ r = t;
+ }
+ if (KIND(r->ctyp) == PTR)
+ die("pointers added");
+ goto Scale;
+ }
+
+ if (op == '-') {
+ if (KIND(l->ctyp) != PTR)
+ die("pointer substracted from integer");
+ if (KIND(r->ctyp) != PTR)
+ goto Scale;
+ if (l->ctyp != r->ctyp)
+ die("non-homogeneous pointers in substraction");
+ return LNG;
+ }
+
+Scale:
+ if (irtyp(r->ctyp) != 'l')
+ sext(r);
+ sz = SIZE(DREF(l->ctyp));
+ fprintf(of, "\t%%t%d =l mul %d, ", sz, tmp);
+ psymb(*r);
+ fprintf(of, "\n");
+ r->u.n = tmp++;
+ return l->ctyp;
+}
+
+Symb
+expr(Node *n)
+{
+ static char *otoa[] = {
+ ['+'] = "add",
+ ['-'] = "sub",
+ ['*'] = "mul",
+ ['/'] = "div",
+ ['%'] = "rem",
+ ['<'] = "cslt", /* meeeeh, careful with pointers */
+ ['l'] = "csle",
+ ['e'] = "cseq",
+ ['n'] = "csne",
+ };
+ Symb sr, s0, s1;
+
+ sr.t = Tmp;
+ sr.u.n = tmp++;
+
+ switch (n->op) {
+
+ case 'V':
+ sr.ctyp = varctyp(n->u.v);
+ fprintf(of, "\t");
+ psymb(sr);
+ fprintf(of, " =%c ", irtyp(sr.ctyp));
+ fprintf(of, "load %%%s\n", n->u.v);
+ break;
+
+ case 'N':
+ sr.t = Con;
+ sr.u.n = n->u.n;
+ sr.ctyp = INT;
+ break;
+
+ case '@':
+ s0 = expr(n->l);
+ if (KIND(s0.ctyp) != PTR)
+ die("dereference of a non-pointer");
+ sr.ctyp = DREF(s0.ctyp);
+ fprintf(of, "\t");
+ psymb(sr);
+ fprintf(of, " =%c load ", irtyp(sr.ctyp));
+ psymb(s0);
+ fprintf(of, "\n");
+ break;
+
+ case '&':
+ sr = lval(n->l);
+ sr.ctyp = IDIR(sr.ctyp);
+ break;
+
+ case '=':
+ s0 = expr(n->r);
+ s1 = lval(n->l);
+ sr = s0;
+ if (s1.ctyp == LNG && s0.ctyp == INT)
+ sext(&s0);
+ if (s1.ctyp != s0.ctyp)
+ die("invalid assignment");
+ fprintf(of, "\tstore%c ", irtyp(s1.ctyp));
+ goto Args;
+
+ case 'P':
+ case 'M':
+ die("unimplemented ++ and --");
+ break;
+
+ default:
+ s0 = expr(n->l);
+ s1 = expr(n->r);
+ sr.ctyp = prom(n->op, &s0, &s1);
+ if (strchr("ne<l", n->op))
+ sr.ctyp = INT;
+ fprintf(of, "\t");
+ psymb(sr);
+ fprintf(of, " =%c", irtyp(sr.ctyp));
+ fprintf(of, " %s ", otoa[(int)n->op]);
+ Args:
+ psymb(s0);
+ fprintf(of, ", ");
+ psymb(s1);
+ fprintf(of, "\n");
+ break;
+
+ }
+ if (n->op == '-'
+ && KIND(s0.ctyp) == PTR
+ && KIND(s1.ctyp) == PTR) {
+ fprintf(of, "\t%%t%d =l div ", tmp);
+ psymb(sr);
+ fprintf(of, ", %d\n", SIZE(DREF(s0.ctyp)));
+ sr.u.n = tmp++;
+ }
+ return sr;
+}
+
+Symb
+lval(Node *n)
+{
+ Symb sr;
+
+ switch (n->op) {
+ default:
+ die("invalid lvalue");
+ case 'V':
+ sr.t = Var;
+ sr.ctyp = varctyp(n->u.v);
+ strcpy(sr.u.v, n->u.v);
+ break;
+ case '@':
+ sr = expr(n->l);
+ if (KIND(sr.ctyp) != PTR)
+ die("dereference of a non-pointer");
+ sr.ctyp = DREF(sr.ctyp);
+ break;
+ }
+ return sr;
+}
+
+void
+stmt(Stmt *s)
+{
+ int l;
+ Symb x;
+Again:
+ if (!s)
+ return;
+
+ switch (s->t) {
+ case Expr:
+ expr(s->p1);
+ break;
+ case Seq:
+ stmt(s->p1);
+ s = s->p2;
+ goto Again;
+ case If:
+ x = expr(s->p1);
+ fprintf(of, "\tjnz "); /* to be clean, a comparison to 0 should be inserted here */
+ psymb(x);
+ l = lbl;
+ lbl += 3;
+ fprintf(of, ", @l%d, @l%d\n", l, l+1);
+ fprintf(of, "@l%d\n", l);
+ stmt(s->p2);
+ if (s->p3)
+ fprintf(of, "\tjmp @l%d\n", l+2);
+ fprintf(of, "@l%d\n", l+1);
+ if (s->p3) {
+ stmt(s->p3);
+ fprintf(of, "@l%d\n", l+2);
+ }
+ break;
+ case While:
+ x = expr(s->p1);
+ l = lbl;
+ lbl += 3;
+ fprintf(of, "@l%d\n", l);
+ fprintf(of, "\tjnz "); /* ditto */
+ psymb(x);
+ fprintf(of, ", @l%d, @l%d\n", l+1, l+2);
+ fprintf(of, "@l%d\n", l+1);
+ stmt(s->p2);
+ fprintf(of, "\tjmp @l%d\n", l);
+ fprintf(of, "@l%d\n", l+2);
+ break;
+ }
+}
+
+Node *
+mknode(char op, Node *l, Node *r)
+{
+ Node *n;
+
+ n = alloc(sizeof *n);
+ n->op = op;
+ n->l = l;
+ n->r = r;
+ return n;
+}
+
+Node *
+mkidx(Node *a, Node *i)
+{
+ Node *n;
+
+ n = mknode('+', a, i);
+ n = mknode('@', n, 0);
+ return n;
+}
+
+Stmt *
+mkstmt(int t, void *p1, void *p2, void *p3)
+{
+ Stmt *s;
+
+ s = alloc(sizeof *s);
+ s->t = t;
+ s->p1 = p1;
+ s->p2 = p2;
+ s->p3 = p3;
+ return s;
+}
+
+%}
+
+%union {
+ Node *n;
+ Stmt *s;
+ unsigned u;
+}
+
+%token <n> NUM
+%token <n> IDENT
+%token PP MM LE GE
+
+%token TINT TLNG
+%token IF ELSE WHILE
+
+%right '='
+%left '+' '-'
+%left '*' '/' '%'
+%nonassoc '&'
+%left EQ NE
+%left '<' '>' LE GE
+%nonassoc PP MM
+%nonassoc '['
+
+%type <u> type
+%type <s> stmt stmts
+%type <n> expr
+
+%%
+
+prog: init '{' dcls stmts '}'
+{
+ stmt($4);
+ fprintf(of, "\tret\n");
+};
+
+init:
+{
+ varclr();
+ lbl = 0;
+ tmp = 0;
+ fprintf(of, "@l%d\n", lbl++);
+};
+
+dcls: | dcls type IDENT ';'
+{
+ int s;
+ char *v;
+
+ v = $3->u.v;
+ s = SIZE($2);
+ varadd(v, $2);
+ fprintf(of, "\t%%%s =l alloc%d %d\n", v, s, s);
+};
+
+type: type '*' { $$ = IDIR($1); }
+ | TINT { $$ = INT; }
+ | TLNG { $$ = LNG; }
+ ;
+
+stmt: ';' { $$ = 0; }
+ | '{' stmts '}' { $$ = $2; }
+ | expr ';' { $$ = mkstmt(Expr, $1, 0, 0); }
+ | WHILE '(' expr ')' stmt { $$ = mkstmt(While, $3, $5, 0); }
+ | IF '(' expr ')' stmt ELSE stmt { $$ = mkstmt(If, $3, $5, $7); }
+ | IF '(' expr ')' stmt { $$ = mkstmt(If, $3, $5, 0); }
+ ;
+
+stmts: stmts stmt { $$ = mkstmt(Seq, $1, $2, 0); }
+ | { $$ = 0; }
+ ;
+
+expr: NUM
+ | IDENT
+ | expr '[' expr ']' { $$ = mkidx($1, $3); }
+ | '(' expr ')' { $$ = $2; }
+ | '*' expr { $$ = mknode('@', $2, 0); }
+ | '&' expr { $$ = mknode('&', $2, 0); }
+ | expr '=' expr { $$ = mknode('=', $1, $3); }
+ | expr '+' expr { $$ = mknode('+', $1, $3); }
+ | expr '-' expr { $$ = mknode('-', $1, $3); }
+ | expr '*' expr { $$ = mknode('*', $1, $3); }
+ | expr '/' expr { $$ = mknode('/', $1, $3); }
+ | expr '%' expr { $$ = mknode('%', $1, $3); }
+ | expr '<' expr { $$ = mknode('<', $1, $3); }
+ | expr '>' expr { $$ = mknode('<', $3, $1); }
+ | expr LE expr { $$ = mknode('l', $1, $3); }
+ | expr GE expr { $$ = mknode('l', $3, $1); }
+ | expr EQ expr { $$ = mknode('e', $1, $3); }
+ | expr NE expr { $$ = mknode('n', $1, $3); }
+ | expr PP { $$ = mknode('P', $1, 0); }
+ | expr MM { $$ = mknode('M', $1, 0); }
+ ;
+
+%%
+
+int
+yylex()
+{
+ struct {
+ char *s;
+ int t;
+ } kwds[] = {
+ { "int", TINT },
+ { "long", TLNG },
+ { "if", IF },
+ { "else", ELSE },
+ { "while", WHILE },
+ { 0, 0 }
+ };
+ int i, c, c1, n, sgn;
+ char v[NString], *p;
+
+ do
+ c = getchar();
+ while (isspace(c));
+
+ if (c == EOF)
+ return 0;
+
+ if (isdigit(c) || c == '-') {
+ n = 0;
+ sgn = 1;
+ if (c == '-') {
+ sgn = -1;
+ c = getchar();
+ if (!isdigit(c)) {
+ ungetc(c, stdin);
+ return '-';
+ }
+ }
+ do {
+ n *= 10;
+ n += c-'0';
+ c = getchar();
+ } while (isdigit(c));
+ ungetc(c, stdin);
+ yylval.n = mknode('N', 0, 0);
+ yylval.n->u.n = n * sgn;
+ return NUM;
+ }
+
+ if (isalpha(c)) {
+ p = v;
+ do {
+ if (p == &v[NString-1])
+ die("ident too long");
+ *p++ = c;
+ c = getchar();
+ } while (isalpha(c));
+ *p = 0;
+ ungetc(c, stdin);
+ for (i=0; kwds[i].s; i++)
+ if (strcmp(v, kwds[i].s) == 0)
+ return kwds[i].t;
+ yylval.n = mknode('V', 0, 0);
+ strcpy(yylval.n->u.v, v);
+ return IDENT;
+ }
+
+ c1 = getchar();
+#define DI(a, b) a + b*256
+ switch (DI(c,c1)) {
+ case DI('!','='): return NE;
+ case DI('=','='): return EQ;
+ case DI('<','='): return LE;
+ case DI('>','='): return GE;
+ case DI('+','+'): return PP;
+ case DI('-','-'): return MM;
+ }
+#undef DI
+ ungetc(c1, stdin);
+
+ return c;
+}
+
+int
+yyerror(char *err)
+{
+ die("parse error");
+ return 0;
+}
+
+int
+main()
+{
+ of = stdout;
+ if (yyparse() != 0)
+ die("parse error");
+ return 0;
+}
diff --git a/minic/test.minic b/minic/test.minic
new file mode 100644
index 0000000..8d30eb1
--- /dev/null
+++ b/minic/test.minic
@@ -0,0 +1,9 @@
+{
+ int x;
+ int *p;
+
+ x = 0;
+ p = &x;
+ while (x < 10)
+ *p = x + 1;
+}
diff --git a/minic/yacc.c b/minic/yacc.c
new file mode 100644
index 0000000..52fd891
--- /dev/null
+++ b/minic/yacc.c
@@ -0,0 +1,1365 @@
+/*% clang -g -Wall -Wextra % -o #
+ * miniyacc - LALR(1) grammars for C
+ * See LICENSE for copyright and license details.
+ */
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef int Sym;
+typedef struct Rule Rule;
+typedef struct TSet TSet;
+typedef struct Info Info;
+typedef struct Term Term;
+typedef struct Item Item;
+typedef struct Row Row;
+
+#define S ((Sym) -1)
+#define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
+#define GetBit(s,n) (s[n/32] & (1<<(n%32)))
+#define SetBit(s,n) (s[n/32] |= 1<<(n%32))
+
+enum {
+ IdntSz = 64,
+ MaxRhs = 32,
+ MaxTk = 500,
+ MaxNt = 500,
+ MaxRl = 800,
+ MaxTm = 1000,
+
+ TSetSz = (MaxTk+31)/32,
+ Sym0 = MaxTk
+};
+
+struct Rule {
+ Sym lhs;
+ Sym rhs[MaxRhs];
+ char *act;
+ int actln;
+ int prec;
+};
+
+struct TSet {
+ unsigned t[TSetSz];
+};
+
+struct Info {
+ int nul;
+ TSet fst;
+ int prec;
+ enum {
+ ANone,
+ ALeft,
+ ARight,
+ ANonassoc
+ } assoc;
+ char name[IdntSz];
+ char type[IdntSz];
+};
+
+struct Term {
+ Rule *rule;
+ int dot;
+ TSet lk;
+};
+
+struct Item {
+ int id;
+ int nt;
+ Term ts[MaxTm];
+ Item **gtbl;
+ int dirty;
+};
+
+struct Row {
+ int def;
+ int ndef;
+ int *t;
+};
+
+char srs[] = "shift/reduce conflict state %d token %s\n";
+char rrs[] = "reduce/reduce conflict state %d token %s\n";
+
+Item i0; /* temporary item */
+
+int nrl, nsy, nst, ntk;
+Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */
+Info is[MaxTk+MaxNt]; /* symbol information */
+Item **st; /* LALR(1) states (ordered, icmp) */
+Row *as; /* action table [state][tok] */
+Row *gs; /* goto table [sym][state] */
+Sym sstart;/* start symbol */
+Item *ini; /* initial state */
+int doty; /* type-checking enabled */
+
+int srconf, rrconf;
+int actsz;
+int *act;
+int *chk;
+int *adsp;
+int *gdsp;
+
+int lineno = 1;
+char *srca;
+FILE *fin;
+FILE *fout;
+FILE *fgrm;
+FILE *fhdr;
+
+void
+die(char *s)
+{
+ fprintf(stderr, "%s (on line %d)\n", s, lineno);
+ exit(1);
+}
+
+void *
+yalloc(size_t n, size_t o)
+{
+ void *p;
+
+ p = calloc(n, o);
+ if (!p)
+ die("out of memory");
+ return p;
+}
+
+int
+rcmp(const void *a, const void *b)
+{
+ return ((Rule *)a)->lhs - ((Rule *)b)->lhs;
+}
+
+Rule *
+rfind(Sym lhs)
+{
+ Rule *r;
+ Rule k;
+
+ k.lhs = lhs;
+ r = bsearch(&k, rs, nrl, sizeof *r, rcmp);
+ if (r != 0)
+ while (r > rs && r[-1].lhs == lhs)
+ r--;
+ return r;
+}
+
+int
+slen(Sym *l)
+{
+ int n;
+
+ for (n=0; *l!=S; n++, l++);
+ return n;
+}
+
+void
+tszero(TSet *ts)
+{
+ memset(ts, 0, sizeof *ts);
+}
+
+int
+tsunion(TSet *tsa, TSet *tsb)
+{
+ int n;
+ unsigned *a, *b, c, t;
+
+ c = 0;
+ a = tsa->t;
+ b = tsb->t;
+ n = (31+ntk)/32;
+ while (n-- > 0) {
+ t = *a;
+ *a |= *b++;
+ c |= t ^ *a++;
+ }
+ return !!c;
+}
+
+void
+first(TSet *ts, Sym *stnc, TSet *last)
+{
+ Sym f;
+
+ f = stnc[0];
+ if (f == S) {
+ if (last)
+ tsunion(ts, last);
+ return;
+ }
+ if (f < ntk) {
+ SetBit(ts->t, f);
+ return;
+ }
+ if (is[f].nul)
+ first(ts, stnc+1, last);
+ tsunion(ts, &is[f].fst);
+}
+
+void
+ginit()
+{
+ int chg;
+ Rule *r;
+ Info *i;
+ Sym *s;
+ TSet ts;
+
+ do {
+ chg = 0;
+ for (r=rs; r-rs<nrl; r++) {
+ i = &is[r->lhs];
+ for (s=r->rhs; *s!=S; s++)
+ if (!is[*s].nul)
+ goto nonul;
+ chg |= i->nul == 0;
+ i->nul = 1;
+ nonul:
+ tszero(&ts);
+ first(&ts, r->rhs, 0);
+ chg |= tsunion(&i->fst, &ts);
+ }
+ } while (chg);
+}
+
+int
+tcmp(Term *a, Term *b)
+{
+ int c;
+
+ c = a->rule - b->rule;
+ if (c==0)
+ c = a->dot - b->dot;
+ return c;
+}
+
+int
+tcmpv(const void *a, const void *b)
+{
+ return tcmp((Term *)a, (Term *)b);
+}
+
+void
+iclose(Item *i)
+{
+ int smap[MaxNt];
+ Rule *r;
+ Term *t, t1;
+ Sym s, *rem;
+ int chg, n, m;
+
+ t1.dot = 0;
+ memset(smap, 0, sizeof smap);
+ for (n=0; n<i->nt; n++) {
+ t = &i->ts[n];
+ s = t->rule->lhs-Sym0;
+ if (t->dot==0)
+ if (smap[s]==0)
+ smap[s] = n;
+ }
+ do {
+ chg = 0;
+ for (n=0; n<i->nt; n++) {
+ t = &i->ts[n];
+ rem = &t->rule->rhs[t->dot];
+ s = *rem++;
+ if (s < Sym0 || s == S)
+ continue;
+ r = rfind(s);
+ if (!r)
+ die("some non-terminals are not defined");
+ tszero(&t1.lk);
+ first(&t1.lk, rem, &t->lk);
+ m = smap[s-Sym0];
+ if (m)
+ for (; r-rs<nrl && r->lhs==s; r++, m++)
+ chg |= tsunion(&i->ts[m].lk, &t1.lk);
+ else {
+ m = i->nt;
+ smap[s-Sym0] = m;
+ for (; r-rs<nrl && r->lhs==s; r++, m++) {
+ if (m>=MaxTm)
+ die("too many terms in item");
+ t1.rule = r;
+ i->ts[m] = t1;
+ }
+ i->nt = m;
+ chg = 1;
+ }
+ }
+ } while (chg);
+}
+
+void
+igoto(Item *i, Sym s)
+{
+ Term *t, *t1;
+ int n;
+
+ i0.nt = 0;
+ for (n=0, t=i->ts; n<i->nt; n++, t++) {
+ if (t->rule->rhs[t->dot] != s)
+ continue;
+ t1 = &i0.ts[i0.nt++];
+ *t1 = *t;
+ t1->dot++;
+ }
+ qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv);
+}
+
+int
+icmp(Item *a, Item *b)
+{
+ Term *ta, *tb, *ma, *mb;
+ int c;
+
+ ta = a->ts;
+ tb = b->ts;
+ ma = ta+a->nt;
+ mb = tb+b->nt;
+ for (;;) {
+ if (ta==ma || ta->dot==0)
+ return -(tb<mb && tb->dot);
+ if (tb==mb || tb->dot==0)
+ return +(ta<ma && ta->dot);
+ if ((c=tcmp(ta++, tb++)))
+ return c;
+ }
+}
+
+int
+stadd(Item **pi)
+{
+ Item *i, *i1;
+ int lo, hi, mid, n, chg;
+
+ /* http://www.iq0.com/duffgram/bsearch.c */
+ i = *pi;
+ lo = 0;
+ hi = nst - 1;
+ if (hi<0 || icmp(i, st[hi])>0)
+ hi++;
+ else if (icmp(i, st[lo])<=0)
+ hi = lo;
+ else
+ while (hi-lo!=1) {
+ mid = (lo+hi)/2;
+ if (icmp(st[mid], i)<0)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi<nst && icmp(st[hi], i)==0) {
+ chg = 0;
+ i1 = st[hi];
+ for (n=0; n<i->nt; n++)
+ chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk);
+ i1->dirty |= chg;
+ *pi = i1;
+ return chg;
+ } else {
+ st = realloc(st, ++nst * sizeof st[0]);
+ if (!st)
+ die("out of memory");
+ memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]);
+ i->gtbl = yalloc(nsy, sizeof i->gtbl[0]);
+ i->dirty = 1;
+ i1 = yalloc(1, sizeof *i1);
+ *i1 = *i;
+ *pi = st[hi] = i1;
+ return 1;
+ }
+}
+
+void
+stgen()
+{
+ Sym s;
+ Rule *r;
+ Item *i, *i1;
+ Term tini;
+ int n, chg;
+
+ ini = &i0;
+ r = rfind(Sym0);
+ tini.rule = r;
+ tini.dot = 0;
+ tszero(&tini.lk);
+ SetBit(tini.lk.t, 0);
+ i0.nt = 0;
+ i0.ts[i0.nt++] = tini;
+ stadd(&ini);
+ do {
+ chg = 0;
+ for (n=0; n<nst; n++) {
+ i = st[n];
+ if (!i->dirty)
+ continue;
+ i->dirty = 0;
+ iclose(i);
+ for (s=0; s<nsy; s++) {
+ igoto(i, s);
+ i1 = &i0;
+ if (!i1->nt) {
+ i->gtbl[s] = 0;
+ continue;
+ }
+ chg |= stadd(&i1);
+ i->gtbl[s] = i1;
+ }
+ }
+ } while (chg);
+}
+
+int
+resolve(Rule *r, Sym s, int st)
+{
+ if (!r->prec || !is[s].prec) {
+ conflict:
+ if (fgrm)
+ fprintf(fgrm, srs, st, is[s].name);
+ srconf++;
+ return ARight;
+ }
+ if (r->prec==is[s].prec) {
+ if (is[s].assoc == ANone)
+ goto conflict;
+ return is[s].assoc;
+ } else
+ if (r->prec<is[s].prec)
+ return ARight;
+ else
+ return ALeft;
+}
+
+void
+tblset(int *tbl, Item *i, Term *t)
+{
+ int act;
+ Sym s;
+
+ s = t->rule->rhs[t->dot];
+ if (s!=S) {
+ /* shift */
+ if (s>=ntk)
+ return;
+ assert(i->gtbl[s]);
+ act = ARight;
+ if (tbl[s] && tbl[s] != i->gtbl[s]->id) {
+ assert(tbl[s]<=0);
+ act = resolve(&rs[Red(tbl[s])], s, i->id-1);
+ }
+ switch (act) {
+ case ARight:
+ tbl[s] = i->gtbl[s]->id;
+ break;
+ case ANonassoc:
+ tbl[s] = -1;
+ break;
+ }
+ } else
+ /* reduce */
+ for (s=0; s<ntk; s++) {
+ if (!GetBit(t->lk.t, s))
+ continue;
+ /* default to shift if conflict occurs */
+ if (!tbl[s])
+ act = ALeft;
+ else if (tbl[s]<0) {
+ if (fgrm)
+ fprintf(fgrm, rrs, i->id-1, is[s].name);
+ rrconf++;
+ act = ARight;
+ } else
+ act = resolve(t->rule, s, i->id-1);
+ switch (act) {
+ case ALeft:
+ tbl[s] = Red(t->rule-rs);
+ break;
+ case ANonassoc:
+ tbl[s] = -1;
+ break;
+ }
+ }
+}
+
+void
+setdef(Row *r, int w, int top)
+{
+ int n, m, x, cnt, def, max;
+
+ max = 0;
+ def = -1;
+ r->ndef = 0;
+ for (n=0; n<w; n++) {
+ x = r->t[n];
+ if (x==0)
+ r->ndef++;
+ if (x>=top || x==0)
+ continue;
+ cnt = 1;
+ for (m=n+1; m<w; m++)
+ if (r->t[m]==x)
+ cnt++;
+ if (cnt>max) {
+ def = x;
+ max = cnt;
+ }
+ }
+ r->def = def;
+ if (max!=0)
+ /* zero out the most frequent entry */
+ for (n=0; n<w; n++)
+ if (r->t[n]==def) {
+ r->t[n] = 0;
+ r->ndef++;
+ }
+}
+
+void
+tblgen()
+{
+ Row *r;
+ Item *i;
+ int n, m;
+
+ for (n=0; n<nst; n++)
+ st[n]->id = n+1;
+ as = yalloc(nst, sizeof as[0]);
+ gs = yalloc(nsy-MaxTk, sizeof gs[0]);
+ /* fill action table */
+ for (n=0; n<nst; n++) {
+ r = &as[n];
+ r->t = yalloc(ntk, sizeof r->t[0]);
+ for (i=st[n], m=0; m<i->nt; m++)
+ tblset(r->t, i, &i->ts[m]);
+ setdef(r, ntk, -1);
+ r->def = Red(r->def); /* Red(-1) == -1 */
+ }
+ /* fill goto table */
+ for (n=MaxTk; n<nsy; n++) {
+ r = &gs[n-MaxTk];
+ r->t = yalloc(nst, sizeof r->t[0]);
+ for (m=0; m<nst; m++)
+ if (st[m]->gtbl[n])
+ r->t[m] = st[m]->gtbl[n]->id;
+ setdef(r, nst, nst+1);
+ }
+}
+
+int
+prcmp(const void *a, const void *b)
+{
+ return (*(Row **)a)->ndef - (*(Row **)b)->ndef;
+}
+
+void
+actgen()
+{
+ Row **o, *r;
+ int n, m, t, dsp, nnt;
+
+ actsz = 0;
+ o = yalloc(nst+nsy, sizeof o[0]);
+ act = yalloc(nst*nsy, sizeof act[0]);
+ chk = yalloc(nst*nsy, sizeof chk[0]);
+ adsp = yalloc(nst, sizeof adsp[0]);
+ for (n=0; n<nst*nsy; n++)
+ chk[n] = -1;
+ /* fill in actions */
+ for (n=0; n<nst; n++)
+ o[n] = &as[n];
+ qsort(o, nst, sizeof o[0], prcmp);
+ for (n=0; n<nst; n++) {
+ r = o[n];
+ dsp = 0;
+ for (m=0; m<ntk && r->t[m]==0; m++)
+ dsp--;
+ retrya:
+ /* The invariant here is even
+ * trickier than it looks.
+ */
+ for (t=0; t<ntk; t++)
+ if ((m=dsp+t)>=0 && chk[m]>=0)
+ if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t]))
+ || (!r->t[t] && chk[m]==t)) {
+ dsp++;
+ goto retrya;
+ }
+ adsp[r-as] = dsp;
+ for (t=0; t<ntk; t++)
+ if (r->t[t]) {
+ chk[dsp+t] = t;
+ act[dsp+t] = r->t[t];
+ if (dsp+t>=actsz)
+ actsz = dsp+t+1;
+ }
+ }
+ /* fill in gotos */
+ nnt = nsy-MaxTk;
+ gdsp = yalloc(nnt, sizeof gdsp[0]);
+ for (n=0; n<nnt; n++)
+ o[n] = &gs[n];
+ qsort(o, nnt, sizeof o[0], prcmp);
+ for (n=0; n<nnt; n++) {
+ r = o[n];
+ dsp = 0;
+ for (m=0; m<nst && r->t[m]==0; m++)
+ dsp--;
+ retryg:
+ for (t=m; t<nst; t++)
+ if (chk[dsp+t]>=0 && r->t[t]) {
+ dsp++;
+ goto retryg;
+ }
+ gdsp[r-gs] = dsp;
+ for (t=m; t<nst; t++)
+ if (r->t[t]) {
+ chk[dsp+t] = ntk+(r-gs);
+ act[dsp+t] = r->t[t];
+ if (dsp+t>=actsz)
+ actsz = dsp+t+1;
+ }
+ }
+ free(o);
+}
+
+void
+aout(char *name, int *t, int n)
+{
+ int i;
+
+ fprintf(fout, "short %s[] = {", name);
+ for (i=0; i<n; i++) {
+ if (i % 10 == 0)
+ fprintf(fout, "\n");
+ fprintf(fout, "%4d", t[i]);
+ if (i != n-1)
+ fprintf(fout, ",");
+ }
+ fprintf(fout, "\n};\n");
+}
+
+void
+tblout()
+{
+ int *o, n, m;
+
+ fprintf(fout, "short yyini = %d;\n", ini->id-1);
+ fprintf(fout, "short yyntoks = %d;\n", ntk);
+ o = yalloc(nrl+nst+nsy, sizeof o[0]);
+ for (n=0; n<nrl; n++)
+ o[n] = slen(rs[n].rhs);
+ aout("yyr1", o, nrl);
+ for (n=0; n<nrl; n++)
+ o[n] = rs[n].lhs-MaxTk;
+ aout("yyr2", o, nrl);
+ for (n=0; n<nst; n++)
+ o[n] = as[n].def;
+ aout("yyadef", o, nst);
+ for (n=0; n<nsy-MaxTk; n++) {
+ o[n] = gs[n].def;
+ assert(o[n]>0 || o[n]==-1);
+ if (o[n]>0)
+ o[n]--;
+ }
+ aout("yygdef", o, nsy-MaxTk);
+ aout("yyadsp", adsp, nst);
+ aout("yygdsp", gdsp, nsy-MaxTk);
+ for (n=0; n<actsz; n++)
+ if (act[n]>=0)
+ act[n]--;
+ aout("yyact", act, actsz);
+ aout("yychk", chk, actsz);
+ for (n=0; n<128; n++) {
+ o[n] = 0;
+ for (m=0; m<ntk; m++)
+ if (is[m].name[0]=='\'')
+ if (is[m].name[1]==n)
+ assert(!o[n]), o[n] = m;
+ }
+ m = 128;
+ for (n=1; n<ntk; n++) {
+ if (is[n].name[0]=='\'')
+ continue;
+ fprintf(fout, "#define %s %d\n", is[n].name, m);
+ if (fhdr)
+ fprintf(fhdr, "#define %s %d\n", is[n].name, m);
+ o[m++] = n;
+ }
+ aout("yytrns", o, m);
+ if (fhdr) {
+ fputs("int yyparse(void);\n", fhdr);
+ fputs("#ifndef YYSTYPE\n", fhdr);
+ fputs("#define YYSTYPE int\n", fhdr);
+ fputs("#endif\n", fhdr);
+ fputs("extern YYSTYPE yylval;\n", fhdr);
+ fputs("#endif\n", fhdr);
+ }
+ free(o);
+}
+
+void
+stdump()
+{
+ Term *t;
+ Sym *s1;
+ int n, m, d, act;
+ Rule *r;
+ Row *ar;
+
+ if (!fgrm)
+ return;
+ for (r=rs; r-rs<nrl; r++) {
+ fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name);
+ for (s1=r->rhs; *s1!=S; s1++)
+ fprintf(fgrm, " %s", is[*s1].name);
+ }
+ fprintf(fgrm, "\n");
+ for (m=0; m<nst; m++) {
+ fprintf(fgrm, "\nState %d:\n", m);
+ for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) {
+ r = t->rule;
+ d = t->dot;
+ if (d==0 && t!=st[m]->ts)
+ continue;
+ fprintf(fgrm, " %s ->", is[r->lhs].name);
+ for (s1=r->rhs; *s1!=S; s1++, d--)
+ fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name);
+ if (!d)
+ fprintf(fgrm, " .");
+ fprintf(fgrm, "\n");
+ }
+ fprintf(fgrm, "\n");
+ ar = &as[m];
+ for (n=0; n<ntk; n++) {
+ act = ar->t[n];
+ if (!act)
+ continue;
+ if (act==-1)
+ fprintf(fgrm, " %s error (nonassoc)\n", is[n].name);
+ else if (act<0)
+ fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act));
+ else
+ fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1);
+ }
+ if (ar->def != -1)
+ fprintf(fgrm, " * reduce with rule %d\n", ar->def);
+ }
+}
+
+enum {
+ TIdnt,
+ TTokchr, /* 'c' */
+ TPP, /* %% */
+ TLL, /* %{ */
+ TLangle, /* < */
+ TRangle, /* > */
+ TSemi, /* ; */
+ TBar, /* | */
+ TColon, /* : */
+ TLBrack, /* { */
+ TUnion,
+ TType,
+ TToken,
+ TRight,
+ TLeft,
+ TNonassoc,
+ TPrec,
+ TStart,
+ TEof
+};
+
+struct {
+ char *name;
+ int tok;
+} words[] = {
+ { "%%", TPP },
+ { "%union", TUnion },
+ { "%type", TType },
+ { "%token", TToken },
+ { "%right", TRight },
+ { "%left", TLeft },
+ { "%nonassoc", TNonassoc },
+ { "%prec", TPrec },
+ { "%start", TStart },
+ { 0, 0 }
+};
+
+char idnt[IdntSz];
+
+int
+istok(int c)
+{
+ return isalnum(c) || c=='_' || c=='%';
+}
+
+int
+nexttk()
+{
+ int n;
+ char c, *p;
+
+ while (isspace(c=fgetc(fin)))
+ if (c == '\n')
+ lineno++;
+ switch (c) {
+ case '<':
+ return TLangle;
+ case '>':
+ return TRangle;
+ case ';':
+ return TSemi;
+ case '|':
+ return TBar;
+ case ':':
+ return TColon;
+ case '{':
+ return TLBrack;
+ case EOF:
+ return TEof;
+ case '\'':
+ idnt[0] = '\'';
+ idnt[1] = fgetc(fin);
+ idnt[2] = '\'';
+ idnt[3] = 0;
+ if (fgetc(fin)!='\'')
+ die("syntax error, invalid char token");
+ return TTokchr;
+ }
+ p = idnt;
+ while (istok(c)) {
+ *p++ = c;
+ if (p-idnt >= IdntSz-1)
+ die("identifier too long");
+ c = fgetc(fin);
+ }
+ *p = 0;
+ if (strcmp(idnt, "%")==0)
+ if (c=='{')
+ return TLL;
+ ungetc(c, fin);
+ for (n=0; words[n].name; n++)
+ if (strcmp(idnt, words[n].name) == 0)
+ return words[n].tok;
+ return TIdnt;
+}
+
+char *
+cpycode()
+{
+ int c, nest, len, pos;
+ char *s;
+
+ len = 64;
+ s = yalloc(len+1, 1);
+ s[0] = '{';
+ pos = 1;
+ nest = 1;
+
+ while (nest) {
+ c = fgetc(fin);
+ if (c == '{')
+ nest++;
+ if (c == '}')
+ nest--;
+ if (c == EOF)
+ die("syntax error, unclosed code block");
+ if (c == '\n')
+ lineno++;
+ if (pos>=len)
+ if (!(s=realloc(s, len=2*len+1)))
+ die("out of memory");
+ s[pos++] = c;
+ }
+ s[pos] = 0;
+ return s;
+}
+
+int
+gettype(char *type)
+{
+ int tk;
+
+ tk = nexttk();
+ if (tk==TLangle) {
+ if (nexttk()!=TIdnt)
+ die("syntax error, ident expected after <");
+ strcpy(type, idnt);
+ if (nexttk()!=TRangle)
+ die("syntax error, unclosed <");
+ return nexttk();
+ } else {
+ type[0] = 0;
+ return tk;
+ }
+}
+
+Sym
+findsy(char *name, int add)
+{
+ int n;
+
+ for (n=0; n<nsy; n++) {
+ if (n == ntk) {
+ if (name[0]=='\'') {
+ if (ntk>=MaxTk)
+ die("too many tokens");
+ ntk++;
+ strcpy(is[n].name, name);
+ return n;
+ }
+ n = MaxTk;
+ }
+ if (strcmp(is[n].name, name)==0)
+ return n;
+ }
+ if (add) {
+ if (nsy>=MaxTk+MaxNt)
+ die("too many non-terminals");
+ strcpy(is[nsy].name, name);
+ return nsy++;
+ } else
+ return nsy;
+}
+
+void
+getdecls()
+{
+ int tk, prec, p, a, c, c1, n;
+ Info *si;
+ char type[IdntSz], *s;
+
+ strcpy(is[0].name, "$");
+ ntk = 1;
+ strcpy(is[Sym0].name, "@start");
+ nsy = MaxTk+1;
+ sstart = S;
+ prec = 0;
+ tk = nexttk();
+ for (;;)
+ switch (tk) {
+ case TStart:
+ tk = nexttk();
+ if (tk!=TIdnt)
+ die("syntax error, ident expected after %start");
+ sstart = findsy(idnt, 1);
+ if (sstart<ntk)
+ die("%start cannot specify a token");
+ tk = nexttk();
+ break;
+ case TUnion:
+ tk = nexttk();
+ if (tk!=TLBrack)
+ die("syntax error, { expected after %union");
+ fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
+ s = cpycode();
+ fprintf(fout, "typedef union %s yyunion;\n", s);
+ fprintf(fout, "#define YYSTYPE yyunion\n");
+ if (fhdr) {
+ fprintf(fhdr, "typedef union %s yyunion;\n", s);
+ fprintf(fhdr, "#define YYSTYPE yyunion\n");
+ }
+ free(s);
+ doty = 1;
+ tk = nexttk();
+ break;
+ case TLeft:
+ p = ++prec;
+ a = ALeft;
+ goto addtoks;
+ case TRight:
+ p = ++prec;
+ a = ARight;
+ goto addtoks;
+ case TNonassoc:
+ p = ++prec;
+ a = ANonassoc;
+ goto addtoks;
+ case TToken:
+ p = 0;
+ a = ANone;
+ addtoks:
+ tk = gettype(type);
+ while (tk==TIdnt || tk==TTokchr) {
+ si = 0;
+ n = findsy(idnt, 0);
+ if (n>=MaxTk && n<nsy)
+ die("non-terminal redeclared as token");
+ if (n==nsy) {
+ if (ntk>=MaxTk)
+ die("too many tokens");
+ n = ntk++;
+ }
+ si = &is[n];
+ strcpy(si->name, idnt);
+ strcpy(si->type, type);
+ si->prec = p;
+ si->assoc = a;
+ tk = nexttk();
+ }
+ break;
+ case TType:
+ tk = gettype(type);
+ if (type[0]==0)
+ die("syntax error, type expected");
+ while (tk==TIdnt) {
+ si = 0;
+ n = findsy(idnt, 1);
+ if (n<ntk)
+ die("token redeclared as non-terminal");
+ if (n==nsy) {
+ nsy++;
+ }
+ si = &is[n];
+ strcpy(si->name, idnt);
+ strcpy(si->type, type);
+ tk = nexttk();
+ }
+ break;
+ case TLL:
+ fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
+ for (;;) {
+ c = fgetc(fin);
+ if (c == EOF)
+ die("syntax error, unclosed %{");
+ if (c == '%') {
+ c1 = fgetc(fin);
+ if (c1 == '}') {
+ fputs("\n", fout);
+ break;
+ }
+ ungetc(c1, fin);
+ }
+ if (c == '\n')
+ lineno++;
+ fputc(c, fout);
+ }
+ tk = nexttk();
+ break;
+ case TPP:
+ return;
+ case TEof:
+ die("syntax error, unfinished declarations");
+ default:
+ die("syntax error, declaration expected");
+ }
+}
+
+void
+getgram()
+{
+ extern char *retcode;
+ int tk;
+ Sym hd, *p, s;
+ Rule *r;
+
+ for (;;) {
+ tk = nexttk();
+ if (tk==TPP || tk==TEof) {
+ if (sstart==S)
+ die("syntax error, empty grammar");
+ r = &rs[nrl++];
+ r->lhs = Sym0;
+ r->rhs[0] = sstart;
+ r->rhs[1] = 0;
+ r->rhs[2] = S;
+ r->act = retcode;
+ qsort(rs, nrl, sizeof rs[0], rcmp);
+ return;
+ }
+ if (tk!=TIdnt)
+ die("syntax error, production rule expected");
+ if (nexttk()!=TColon)
+ die("syntax error, colon expected after production's head");
+ hd = findsy(idnt, 1);
+ if (sstart==S)
+ sstart = hd;
+ do {
+ if (nrl>=MaxRl-1)
+ die("too many rules");
+ r = &rs[nrl++];
+ r->lhs = hd;
+ r->act = 0;
+ p = r->rhs;
+ while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) {
+ if (tk==TPrec) {
+ tk = nexttk();
+ if (tk!=TIdnt
+ || (s=findsy(idnt, 0))>=ntk)
+ die("token expected after %prec");
+ r->prec = is[s].prec;
+ continue;
+ }
+ s = findsy(idnt, 1);
+ *p++ = s;
+ if (s<ntk && is[s].prec>0)
+ r->prec = is[s].prec;
+ if (p-r->rhs >= MaxRhs-1)
+ die("production rule too long");
+ }
+ *p = S;
+ if (tk==TLBrack) {
+ r->actln = lineno;
+ r->act = cpycode();
+ tk = nexttk();
+ }
+ } while (tk==TBar);
+ if (tk!=TSemi)
+ die("syntax error, ; or | expected");
+ }
+}
+
+void
+actout(Rule *r)
+{
+ long l;
+ int i, ar;
+ char c, *p, *ty, tya[IdntSz];
+
+ ar = slen(r->rhs);
+ p = r->act;
+ i = r->actln;
+ if (!p)
+ return;
+ while ((c=*p++))
+ switch (c) {
+ case '\n':
+ i++;
+ default:
+ fputc(c, fout);
+ break;
+ case '$':
+ c = *p++;
+ if (c == '$') {
+ fprintf(fout, "yyval");
+ if (doty) {
+ ty = is[r->lhs].type;
+ if (!ty[0]) {
+ lineno = i;
+ die("$$ has no type");
+ }
+ fprintf(fout, ".%s", ty);
+ }
+ }
+ else if (c == '<') {
+ ty = tya;
+ while (istok(*p) && ty-tya<IdntSz-1)
+ *ty++ = *p++;
+ *ty = 0;
+ if (*p++!='>') {
+ lineno = i;
+ die("unclosed tag field");
+ }
+ ty = tya;
+ c = *p++;
+ if (c == '$') {
+ fprintf(fout, "yyval.%s", ty);
+ } else {
+ if (!isdigit(c)) {
+ lineno = i;
+ die("number or $ expected afer tag field");
+ }
+ goto readnum;
+ }
+ }
+ else if (isdigit(c)) {
+ ty = 0;
+ readnum:
+ l = strtol(p-1, &p, 10);
+ if (l > ar) {
+ lineno = i;
+ die("invalid $n");
+ }
+ fprintf(fout, "ps[%d].val", (int)l);
+ if (doty) {
+ if (!ty && l>0)
+ ty = is[r->rhs[l-1]].type;
+ if (!ty || !ty[0]) {
+ lineno = i;
+ die("$n has no type");
+ }
+ fprintf(fout, ".%s", ty);
+ }
+ }
+ }
+ fputs("\n", fout);
+}
+
+void
+codeout()
+{
+ extern char *code0[], *code1[];
+ char **p;
+ Rule *r;
+ int n, c;
+
+ for (p=code0; *p; p++)
+ fputs(*p, fout);
+ for (n=0; n<nrl; n++) {
+ fprintf(fout, "\tcase %d:\n", n);
+ r = &rs[n];
+ fprintf(fout, "#line %d \"%s\"\n", r->actln, srca);
+ actout(r);
+ fputs("\t\tbreak;\n", fout);
+ }
+ for (p=code1; *p; p++)
+ fputs(*p, fout);
+ fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
+ while ((c=fgetc(fin))!=EOF)
+ fputc(c, fout);
+}
+
+void
+init(int ac, char *av[])
+{
+ int c, vf, df;
+ char *pref, buf[100], *opt;
+
+ (void) ac;
+ pref = "y";
+ vf = df = 0;
+ for (av++; av[0] && av[0][0]=='-'; av++)
+ for (opt = &av[0][1]; (c = *opt); opt++)
+ switch (c) {
+ case 'v':
+ vf = 1;
+ break;
+ case 'd':
+ df = 1;
+ break;
+ case 'b':
+ if ((pref = *++av))
+ break;
+ default:
+ usage:
+ fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr);
+ exit(1);
+ }
+
+ if (!(srca = *av))
+ goto usage;
+ fin = fopen(srca, "r");
+ if (strlen(pref) + 10 > sizeof buf)
+ die("-b prefix too long");
+ sprintf(buf, "%s.tab.c", pref);
+ fout = fopen(buf, "w");
+ if (vf) {
+ sprintf(buf, "%s.output", pref);
+ fgrm = fopen(buf, "w");
+ }
+ if (df) {
+ sprintf(buf, "%s.tab.h", pref);
+ fhdr = fopen(buf, "w");
+ if (fhdr) {
+ fprintf(fhdr, "#ifndef Y_TAB_H_\n");
+ fprintf(fhdr, "#define Y_TAB_H_\n");
+ }
+ }
+ if (!fin || !fout || (!fgrm && vf) || (!fhdr && df))
+ die("cannot open work files");
+}
+
+int
+main(int ac, char *av[])
+{
+
+ init(ac, av);
+ getdecls();
+ getgram();
+ ginit();
+ stgen();
+ tblgen();
+ stdump();
+ actgen();
+ tblout();
+ codeout();
+
+ if (srconf)
+ fprintf(stderr, "%d shift/reduce conflicts\n", srconf);
+ if (rrconf)
+ fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf);
+
+ exit(0);
+}
+
+/* Glorious macros.
+ |sed 's|.*|"&\\n",|'
+*/
+
+char *retcode = "\t\tyyval = ps[1].val; return 0;";
+
+char *code0[] = {
+"\n",
+"#ifndef YYSTYPE\n",
+"#define YYSTYPE int\n",
+"#endif\n",
+"YYSTYPE yylval;\n",
+"\n",
+"int\n",
+"yyparse()\n",
+"{\n",
+" enum {\n",
+" StackSize = 100,\n",
+" ActSz = sizeof yyact / sizeof yyact[0],\n",
+" };\n",
+" struct {\n",
+" YYSTYPE val;\n",
+" int state;\n",
+" } stk[StackSize], *ps;\n",
+" int r, h, n, s, tk;\n",
+" YYSTYPE yyval;\n",
+"\n",
+" ps = stk;\n",
+" ps->state = s = yyini;\n",
+" tk = -1;\n",
+"loop:\n",
+" n = yyadsp[s];\n",
+" if (tk < 0 && n > -yyntoks)\n",
+" tk = yytrns[yylex()];\n",
+" n += tk;\n",
+" if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
+" r = yyadef[s];\n",
+" if (r < 0)\n",
+" return -1;\n",
+" goto reduce;\n",
+" }\n",
+" n = yyact[n];\n",
+" if (n == -1)\n",
+" return -1;\n",
+" if (n < 0) {\n",
+" r = - (n+2);\n",
+" goto reduce;\n",
+" }\n",
+" tk = -1;\n",
+" yyval = yylval;\n",
+"stack:\n",
+" ps++;\n",
+" if (ps-stk >= StackSize)\n",
+" return -2;\n",
+" s = n;\n",
+" ps->state = s;\n",
+" ps->val = yyval;\n",
+" goto loop;\n",
+"reduce:\n",
+" ps -= yyr1[r];\n",
+" h = yyr2[r];\n",
+" s = ps->state;\n",
+" n = yygdsp[h] + s;\n",
+" if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
+" n = yygdef[h];\n",
+" else\n",
+" n = yyact[n];\n",
+" switch (r) {\n",
+0
+};
+
+char *code1[] = {
+" }\n",
+" goto stack;\n",
+"}\n",
+0
+};