aboutsummaryrefslogtreecommitdiff
path: root/minic/yacc.c
diff options
context:
space:
mode:
authorQuentin Carbonneaux <[email protected]>2015-10-02 15:34:38 -0400
committerQuentin Carbonneaux <[email protected]>2015-10-02 15:34:38 -0400
commit30eced928c404caae41d62f238270f00bdf7c25b (patch)
tree6f305c2d318e4a9b2dbec9f5a38a704a090c96c1 /minic/yacc.c
parentecaad8119f77601fec4cab61c510d697ec5bb939 (diff)
start an example compiler for a subset of C
Diffstat (limited to 'minic/yacc.c')
-rw-r--r--minic/yacc.c1365
1 files changed, 1365 insertions, 0 deletions
diff --git a/minic/yacc.c b/minic/yacc.c
new file mode 100644
index 0000000..52fd891
--- /dev/null
+++ b/minic/yacc.c
@@ -0,0 +1,1365 @@
+/*% clang -g -Wall -Wextra % -o #
+ * miniyacc - LALR(1) grammars for C
+ * See LICENSE for copyright and license details.
+ */
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef int Sym;
+typedef struct Rule Rule;
+typedef struct TSet TSet;
+typedef struct Info Info;
+typedef struct Term Term;
+typedef struct Item Item;
+typedef struct Row Row;
+
+#define S ((Sym) -1)
+#define Red(n) (- (n+2)) /* involutive, Red(Red(x)) == x */
+#define GetBit(s,n) (s[n/32] & (1<<(n%32)))
+#define SetBit(s,n) (s[n/32] |= 1<<(n%32))
+
+enum {
+ IdntSz = 64,
+ MaxRhs = 32,
+ MaxTk = 500,
+ MaxNt = 500,
+ MaxRl = 800,
+ MaxTm = 1000,
+
+ TSetSz = (MaxTk+31)/32,
+ Sym0 = MaxTk
+};
+
+struct Rule {
+ Sym lhs;
+ Sym rhs[MaxRhs];
+ char *act;
+ int actln;
+ int prec;
+};
+
+struct TSet {
+ unsigned t[TSetSz];
+};
+
+struct Info {
+ int nul;
+ TSet fst;
+ int prec;
+ enum {
+ ANone,
+ ALeft,
+ ARight,
+ ANonassoc
+ } assoc;
+ char name[IdntSz];
+ char type[IdntSz];
+};
+
+struct Term {
+ Rule *rule;
+ int dot;
+ TSet lk;
+};
+
+struct Item {
+ int id;
+ int nt;
+ Term ts[MaxTm];
+ Item **gtbl;
+ int dirty;
+};
+
+struct Row {
+ int def;
+ int ndef;
+ int *t;
+};
+
+char srs[] = "shift/reduce conflict state %d token %s\n";
+char rrs[] = "reduce/reduce conflict state %d token %s\n";
+
+Item i0; /* temporary item */
+
+int nrl, nsy, nst, ntk;
+Rule rs[MaxRl]; /* grammar rules (ordered, rcmp) */
+Info is[MaxTk+MaxNt]; /* symbol information */
+Item **st; /* LALR(1) states (ordered, icmp) */
+Row *as; /* action table [state][tok] */
+Row *gs; /* goto table [sym][state] */
+Sym sstart;/* start symbol */
+Item *ini; /* initial state */
+int doty; /* type-checking enabled */
+
+int srconf, rrconf;
+int actsz;
+int *act;
+int *chk;
+int *adsp;
+int *gdsp;
+
+int lineno = 1;
+char *srca;
+FILE *fin;
+FILE *fout;
+FILE *fgrm;
+FILE *fhdr;
+
+void
+die(char *s)
+{
+ fprintf(stderr, "%s (on line %d)\n", s, lineno);
+ exit(1);
+}
+
+void *
+yalloc(size_t n, size_t o)
+{
+ void *p;
+
+ p = calloc(n, o);
+ if (!p)
+ die("out of memory");
+ return p;
+}
+
+int
+rcmp(const void *a, const void *b)
+{
+ return ((Rule *)a)->lhs - ((Rule *)b)->lhs;
+}
+
+Rule *
+rfind(Sym lhs)
+{
+ Rule *r;
+ Rule k;
+
+ k.lhs = lhs;
+ r = bsearch(&k, rs, nrl, sizeof *r, rcmp);
+ if (r != 0)
+ while (r > rs && r[-1].lhs == lhs)
+ r--;
+ return r;
+}
+
+int
+slen(Sym *l)
+{
+ int n;
+
+ for (n=0; *l!=S; n++, l++);
+ return n;
+}
+
+void
+tszero(TSet *ts)
+{
+ memset(ts, 0, sizeof *ts);
+}
+
+int
+tsunion(TSet *tsa, TSet *tsb)
+{
+ int n;
+ unsigned *a, *b, c, t;
+
+ c = 0;
+ a = tsa->t;
+ b = tsb->t;
+ n = (31+ntk)/32;
+ while (n-- > 0) {
+ t = *a;
+ *a |= *b++;
+ c |= t ^ *a++;
+ }
+ return !!c;
+}
+
+void
+first(TSet *ts, Sym *stnc, TSet *last)
+{
+ Sym f;
+
+ f = stnc[0];
+ if (f == S) {
+ if (last)
+ tsunion(ts, last);
+ return;
+ }
+ if (f < ntk) {
+ SetBit(ts->t, f);
+ return;
+ }
+ if (is[f].nul)
+ first(ts, stnc+1, last);
+ tsunion(ts, &is[f].fst);
+}
+
+void
+ginit()
+{
+ int chg;
+ Rule *r;
+ Info *i;
+ Sym *s;
+ TSet ts;
+
+ do {
+ chg = 0;
+ for (r=rs; r-rs<nrl; r++) {
+ i = &is[r->lhs];
+ for (s=r->rhs; *s!=S; s++)
+ if (!is[*s].nul)
+ goto nonul;
+ chg |= i->nul == 0;
+ i->nul = 1;
+ nonul:
+ tszero(&ts);
+ first(&ts, r->rhs, 0);
+ chg |= tsunion(&i->fst, &ts);
+ }
+ } while (chg);
+}
+
+int
+tcmp(Term *a, Term *b)
+{
+ int c;
+
+ c = a->rule - b->rule;
+ if (c==0)
+ c = a->dot - b->dot;
+ return c;
+}
+
+int
+tcmpv(const void *a, const void *b)
+{
+ return tcmp((Term *)a, (Term *)b);
+}
+
+void
+iclose(Item *i)
+{
+ int smap[MaxNt];
+ Rule *r;
+ Term *t, t1;
+ Sym s, *rem;
+ int chg, n, m;
+
+ t1.dot = 0;
+ memset(smap, 0, sizeof smap);
+ for (n=0; n<i->nt; n++) {
+ t = &i->ts[n];
+ s = t->rule->lhs-Sym0;
+ if (t->dot==0)
+ if (smap[s]==0)
+ smap[s] = n;
+ }
+ do {
+ chg = 0;
+ for (n=0; n<i->nt; n++) {
+ t = &i->ts[n];
+ rem = &t->rule->rhs[t->dot];
+ s = *rem++;
+ if (s < Sym0 || s == S)
+ continue;
+ r = rfind(s);
+ if (!r)
+ die("some non-terminals are not defined");
+ tszero(&t1.lk);
+ first(&t1.lk, rem, &t->lk);
+ m = smap[s-Sym0];
+ if (m)
+ for (; r-rs<nrl && r->lhs==s; r++, m++)
+ chg |= tsunion(&i->ts[m].lk, &t1.lk);
+ else {
+ m = i->nt;
+ smap[s-Sym0] = m;
+ for (; r-rs<nrl && r->lhs==s; r++, m++) {
+ if (m>=MaxTm)
+ die("too many terms in item");
+ t1.rule = r;
+ i->ts[m] = t1;
+ }
+ i->nt = m;
+ chg = 1;
+ }
+ }
+ } while (chg);
+}
+
+void
+igoto(Item *i, Sym s)
+{
+ Term *t, *t1;
+ int n;
+
+ i0.nt = 0;
+ for (n=0, t=i->ts; n<i->nt; n++, t++) {
+ if (t->rule->rhs[t->dot] != s)
+ continue;
+ t1 = &i0.ts[i0.nt++];
+ *t1 = *t;
+ t1->dot++;
+ }
+ qsort(i0.ts, i0.nt, sizeof i0.ts[0], tcmpv);
+}
+
+int
+icmp(Item *a, Item *b)
+{
+ Term *ta, *tb, *ma, *mb;
+ int c;
+
+ ta = a->ts;
+ tb = b->ts;
+ ma = ta+a->nt;
+ mb = tb+b->nt;
+ for (;;) {
+ if (ta==ma || ta->dot==0)
+ return -(tb<mb && tb->dot);
+ if (tb==mb || tb->dot==0)
+ return +(ta<ma && ta->dot);
+ if ((c=tcmp(ta++, tb++)))
+ return c;
+ }
+}
+
+int
+stadd(Item **pi)
+{
+ Item *i, *i1;
+ int lo, hi, mid, n, chg;
+
+ /* http://www.iq0.com/duffgram/bsearch.c */
+ i = *pi;
+ lo = 0;
+ hi = nst - 1;
+ if (hi<0 || icmp(i, st[hi])>0)
+ hi++;
+ else if (icmp(i, st[lo])<=0)
+ hi = lo;
+ else
+ while (hi-lo!=1) {
+ mid = (lo+hi)/2;
+ if (icmp(st[mid], i)<0)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi<nst && icmp(st[hi], i)==0) {
+ chg = 0;
+ i1 = st[hi];
+ for (n=0; n<i->nt; n++)
+ chg |= tsunion(&i1->ts[n].lk, &i->ts[n].lk);
+ i1->dirty |= chg;
+ *pi = i1;
+ return chg;
+ } else {
+ st = realloc(st, ++nst * sizeof st[0]);
+ if (!st)
+ die("out of memory");
+ memmove(&st[hi+1], &st[hi], (nst-1 - hi) * sizeof st[0]);
+ i->gtbl = yalloc(nsy, sizeof i->gtbl[0]);
+ i->dirty = 1;
+ i1 = yalloc(1, sizeof *i1);
+ *i1 = *i;
+ *pi = st[hi] = i1;
+ return 1;
+ }
+}
+
+void
+stgen()
+{
+ Sym s;
+ Rule *r;
+ Item *i, *i1;
+ Term tini;
+ int n, chg;
+
+ ini = &i0;
+ r = rfind(Sym0);
+ tini.rule = r;
+ tini.dot = 0;
+ tszero(&tini.lk);
+ SetBit(tini.lk.t, 0);
+ i0.nt = 0;
+ i0.ts[i0.nt++] = tini;
+ stadd(&ini);
+ do {
+ chg = 0;
+ for (n=0; n<nst; n++) {
+ i = st[n];
+ if (!i->dirty)
+ continue;
+ i->dirty = 0;
+ iclose(i);
+ for (s=0; s<nsy; s++) {
+ igoto(i, s);
+ i1 = &i0;
+ if (!i1->nt) {
+ i->gtbl[s] = 0;
+ continue;
+ }
+ chg |= stadd(&i1);
+ i->gtbl[s] = i1;
+ }
+ }
+ } while (chg);
+}
+
+int
+resolve(Rule *r, Sym s, int st)
+{
+ if (!r->prec || !is[s].prec) {
+ conflict:
+ if (fgrm)
+ fprintf(fgrm, srs, st, is[s].name);
+ srconf++;
+ return ARight;
+ }
+ if (r->prec==is[s].prec) {
+ if (is[s].assoc == ANone)
+ goto conflict;
+ return is[s].assoc;
+ } else
+ if (r->prec<is[s].prec)
+ return ARight;
+ else
+ return ALeft;
+}
+
+void
+tblset(int *tbl, Item *i, Term *t)
+{
+ int act;
+ Sym s;
+
+ s = t->rule->rhs[t->dot];
+ if (s!=S) {
+ /* shift */
+ if (s>=ntk)
+ return;
+ assert(i->gtbl[s]);
+ act = ARight;
+ if (tbl[s] && tbl[s] != i->gtbl[s]->id) {
+ assert(tbl[s]<=0);
+ act = resolve(&rs[Red(tbl[s])], s, i->id-1);
+ }
+ switch (act) {
+ case ARight:
+ tbl[s] = i->gtbl[s]->id;
+ break;
+ case ANonassoc:
+ tbl[s] = -1;
+ break;
+ }
+ } else
+ /* reduce */
+ for (s=0; s<ntk; s++) {
+ if (!GetBit(t->lk.t, s))
+ continue;
+ /* default to shift if conflict occurs */
+ if (!tbl[s])
+ act = ALeft;
+ else if (tbl[s]<0) {
+ if (fgrm)
+ fprintf(fgrm, rrs, i->id-1, is[s].name);
+ rrconf++;
+ act = ARight;
+ } else
+ act = resolve(t->rule, s, i->id-1);
+ switch (act) {
+ case ALeft:
+ tbl[s] = Red(t->rule-rs);
+ break;
+ case ANonassoc:
+ tbl[s] = -1;
+ break;
+ }
+ }
+}
+
+void
+setdef(Row *r, int w, int top)
+{
+ int n, m, x, cnt, def, max;
+
+ max = 0;
+ def = -1;
+ r->ndef = 0;
+ for (n=0; n<w; n++) {
+ x = r->t[n];
+ if (x==0)
+ r->ndef++;
+ if (x>=top || x==0)
+ continue;
+ cnt = 1;
+ for (m=n+1; m<w; m++)
+ if (r->t[m]==x)
+ cnt++;
+ if (cnt>max) {
+ def = x;
+ max = cnt;
+ }
+ }
+ r->def = def;
+ if (max!=0)
+ /* zero out the most frequent entry */
+ for (n=0; n<w; n++)
+ if (r->t[n]==def) {
+ r->t[n] = 0;
+ r->ndef++;
+ }
+}
+
+void
+tblgen()
+{
+ Row *r;
+ Item *i;
+ int n, m;
+
+ for (n=0; n<nst; n++)
+ st[n]->id = n+1;
+ as = yalloc(nst, sizeof as[0]);
+ gs = yalloc(nsy-MaxTk, sizeof gs[0]);
+ /* fill action table */
+ for (n=0; n<nst; n++) {
+ r = &as[n];
+ r->t = yalloc(ntk, sizeof r->t[0]);
+ for (i=st[n], m=0; m<i->nt; m++)
+ tblset(r->t, i, &i->ts[m]);
+ setdef(r, ntk, -1);
+ r->def = Red(r->def); /* Red(-1) == -1 */
+ }
+ /* fill goto table */
+ for (n=MaxTk; n<nsy; n++) {
+ r = &gs[n-MaxTk];
+ r->t = yalloc(nst, sizeof r->t[0]);
+ for (m=0; m<nst; m++)
+ if (st[m]->gtbl[n])
+ r->t[m] = st[m]->gtbl[n]->id;
+ setdef(r, nst, nst+1);
+ }
+}
+
+int
+prcmp(const void *a, const void *b)
+{
+ return (*(Row **)a)->ndef - (*(Row **)b)->ndef;
+}
+
+void
+actgen()
+{
+ Row **o, *r;
+ int n, m, t, dsp, nnt;
+
+ actsz = 0;
+ o = yalloc(nst+nsy, sizeof o[0]);
+ act = yalloc(nst*nsy, sizeof act[0]);
+ chk = yalloc(nst*nsy, sizeof chk[0]);
+ adsp = yalloc(nst, sizeof adsp[0]);
+ for (n=0; n<nst*nsy; n++)
+ chk[n] = -1;
+ /* fill in actions */
+ for (n=0; n<nst; n++)
+ o[n] = &as[n];
+ qsort(o, nst, sizeof o[0], prcmp);
+ for (n=0; n<nst; n++) {
+ r = o[n];
+ dsp = 0;
+ for (m=0; m<ntk && r->t[m]==0; m++)
+ dsp--;
+ retrya:
+ /* The invariant here is even
+ * trickier than it looks.
+ */
+ for (t=0; t<ntk; t++)
+ if ((m=dsp+t)>=0 && chk[m]>=0)
+ if ((r->t[t] && (chk[m]!=t || act[m]!=r->t[t]))
+ || (!r->t[t] && chk[m]==t)) {
+ dsp++;
+ goto retrya;
+ }
+ adsp[r-as] = dsp;
+ for (t=0; t<ntk; t++)
+ if (r->t[t]) {
+ chk[dsp+t] = t;
+ act[dsp+t] = r->t[t];
+ if (dsp+t>=actsz)
+ actsz = dsp+t+1;
+ }
+ }
+ /* fill in gotos */
+ nnt = nsy-MaxTk;
+ gdsp = yalloc(nnt, sizeof gdsp[0]);
+ for (n=0; n<nnt; n++)
+ o[n] = &gs[n];
+ qsort(o, nnt, sizeof o[0], prcmp);
+ for (n=0; n<nnt; n++) {
+ r = o[n];
+ dsp = 0;
+ for (m=0; m<nst && r->t[m]==0; m++)
+ dsp--;
+ retryg:
+ for (t=m; t<nst; t++)
+ if (chk[dsp+t]>=0 && r->t[t]) {
+ dsp++;
+ goto retryg;
+ }
+ gdsp[r-gs] = dsp;
+ for (t=m; t<nst; t++)
+ if (r->t[t]) {
+ chk[dsp+t] = ntk+(r-gs);
+ act[dsp+t] = r->t[t];
+ if (dsp+t>=actsz)
+ actsz = dsp+t+1;
+ }
+ }
+ free(o);
+}
+
+void
+aout(char *name, int *t, int n)
+{
+ int i;
+
+ fprintf(fout, "short %s[] = {", name);
+ for (i=0; i<n; i++) {
+ if (i % 10 == 0)
+ fprintf(fout, "\n");
+ fprintf(fout, "%4d", t[i]);
+ if (i != n-1)
+ fprintf(fout, ",");
+ }
+ fprintf(fout, "\n};\n");
+}
+
+void
+tblout()
+{
+ int *o, n, m;
+
+ fprintf(fout, "short yyini = %d;\n", ini->id-1);
+ fprintf(fout, "short yyntoks = %d;\n", ntk);
+ o = yalloc(nrl+nst+nsy, sizeof o[0]);
+ for (n=0; n<nrl; n++)
+ o[n] = slen(rs[n].rhs);
+ aout("yyr1", o, nrl);
+ for (n=0; n<nrl; n++)
+ o[n] = rs[n].lhs-MaxTk;
+ aout("yyr2", o, nrl);
+ for (n=0; n<nst; n++)
+ o[n] = as[n].def;
+ aout("yyadef", o, nst);
+ for (n=0; n<nsy-MaxTk; n++) {
+ o[n] = gs[n].def;
+ assert(o[n]>0 || o[n]==-1);
+ if (o[n]>0)
+ o[n]--;
+ }
+ aout("yygdef", o, nsy-MaxTk);
+ aout("yyadsp", adsp, nst);
+ aout("yygdsp", gdsp, nsy-MaxTk);
+ for (n=0; n<actsz; n++)
+ if (act[n]>=0)
+ act[n]--;
+ aout("yyact", act, actsz);
+ aout("yychk", chk, actsz);
+ for (n=0; n<128; n++) {
+ o[n] = 0;
+ for (m=0; m<ntk; m++)
+ if (is[m].name[0]=='\'')
+ if (is[m].name[1]==n)
+ assert(!o[n]), o[n] = m;
+ }
+ m = 128;
+ for (n=1; n<ntk; n++) {
+ if (is[n].name[0]=='\'')
+ continue;
+ fprintf(fout, "#define %s %d\n", is[n].name, m);
+ if (fhdr)
+ fprintf(fhdr, "#define %s %d\n", is[n].name, m);
+ o[m++] = n;
+ }
+ aout("yytrns", o, m);
+ if (fhdr) {
+ fputs("int yyparse(void);\n", fhdr);
+ fputs("#ifndef YYSTYPE\n", fhdr);
+ fputs("#define YYSTYPE int\n", fhdr);
+ fputs("#endif\n", fhdr);
+ fputs("extern YYSTYPE yylval;\n", fhdr);
+ fputs("#endif\n", fhdr);
+ }
+ free(o);
+}
+
+void
+stdump()
+{
+ Term *t;
+ Sym *s1;
+ int n, m, d, act;
+ Rule *r;
+ Row *ar;
+
+ if (!fgrm)
+ return;
+ for (r=rs; r-rs<nrl; r++) {
+ fprintf(fgrm, "\n%03d: %s ->", (int)(r-rs), is[r->lhs].name);
+ for (s1=r->rhs; *s1!=S; s1++)
+ fprintf(fgrm, " %s", is[*s1].name);
+ }
+ fprintf(fgrm, "\n");
+ for (m=0; m<nst; m++) {
+ fprintf(fgrm, "\nState %d:\n", m);
+ for (t=st[m]->ts; t-st[m]->ts<st[m]->nt; t++) {
+ r = t->rule;
+ d = t->dot;
+ if (d==0 && t!=st[m]->ts)
+ continue;
+ fprintf(fgrm, " %s ->", is[r->lhs].name);
+ for (s1=r->rhs; *s1!=S; s1++, d--)
+ fprintf(fgrm, " %s%s", d ? "" : ". ", is[*s1].name);
+ if (!d)
+ fprintf(fgrm, " .");
+ fprintf(fgrm, "\n");
+ }
+ fprintf(fgrm, "\n");
+ ar = &as[m];
+ for (n=0; n<ntk; n++) {
+ act = ar->t[n];
+ if (!act)
+ continue;
+ if (act==-1)
+ fprintf(fgrm, " %s error (nonassoc)\n", is[n].name);
+ else if (act<0)
+ fprintf(fgrm, " %s reduce with rule %d\n", is[n].name, Red(act));
+ else
+ fprintf(fgrm, " %s shift and go to %d\n", is[n].name, act-1);
+ }
+ if (ar->def != -1)
+ fprintf(fgrm, " * reduce with rule %d\n", ar->def);
+ }
+}
+
+enum {
+ TIdnt,
+ TTokchr, /* 'c' */
+ TPP, /* %% */
+ TLL, /* %{ */
+ TLangle, /* < */
+ TRangle, /* > */
+ TSemi, /* ; */
+ TBar, /* | */
+ TColon, /* : */
+ TLBrack, /* { */
+ TUnion,
+ TType,
+ TToken,
+ TRight,
+ TLeft,
+ TNonassoc,
+ TPrec,
+ TStart,
+ TEof
+};
+
+struct {
+ char *name;
+ int tok;
+} words[] = {
+ { "%%", TPP },
+ { "%union", TUnion },
+ { "%type", TType },
+ { "%token", TToken },
+ { "%right", TRight },
+ { "%left", TLeft },
+ { "%nonassoc", TNonassoc },
+ { "%prec", TPrec },
+ { "%start", TStart },
+ { 0, 0 }
+};
+
+char idnt[IdntSz];
+
+int
+istok(int c)
+{
+ return isalnum(c) || c=='_' || c=='%';
+}
+
+int
+nexttk()
+{
+ int n;
+ char c, *p;
+
+ while (isspace(c=fgetc(fin)))
+ if (c == '\n')
+ lineno++;
+ switch (c) {
+ case '<':
+ return TLangle;
+ case '>':
+ return TRangle;
+ case ';':
+ return TSemi;
+ case '|':
+ return TBar;
+ case ':':
+ return TColon;
+ case '{':
+ return TLBrack;
+ case EOF:
+ return TEof;
+ case '\'':
+ idnt[0] = '\'';
+ idnt[1] = fgetc(fin);
+ idnt[2] = '\'';
+ idnt[3] = 0;
+ if (fgetc(fin)!='\'')
+ die("syntax error, invalid char token");
+ return TTokchr;
+ }
+ p = idnt;
+ while (istok(c)) {
+ *p++ = c;
+ if (p-idnt >= IdntSz-1)
+ die("identifier too long");
+ c = fgetc(fin);
+ }
+ *p = 0;
+ if (strcmp(idnt, "%")==0)
+ if (c=='{')
+ return TLL;
+ ungetc(c, fin);
+ for (n=0; words[n].name; n++)
+ if (strcmp(idnt, words[n].name) == 0)
+ return words[n].tok;
+ return TIdnt;
+}
+
+char *
+cpycode()
+{
+ int c, nest, len, pos;
+ char *s;
+
+ len = 64;
+ s = yalloc(len+1, 1);
+ s[0] = '{';
+ pos = 1;
+ nest = 1;
+
+ while (nest) {
+ c = fgetc(fin);
+ if (c == '{')
+ nest++;
+ if (c == '}')
+ nest--;
+ if (c == EOF)
+ die("syntax error, unclosed code block");
+ if (c == '\n')
+ lineno++;
+ if (pos>=len)
+ if (!(s=realloc(s, len=2*len+1)))
+ die("out of memory");
+ s[pos++] = c;
+ }
+ s[pos] = 0;
+ return s;
+}
+
+int
+gettype(char *type)
+{
+ int tk;
+
+ tk = nexttk();
+ if (tk==TLangle) {
+ if (nexttk()!=TIdnt)
+ die("syntax error, ident expected after <");
+ strcpy(type, idnt);
+ if (nexttk()!=TRangle)
+ die("syntax error, unclosed <");
+ return nexttk();
+ } else {
+ type[0] = 0;
+ return tk;
+ }
+}
+
+Sym
+findsy(char *name, int add)
+{
+ int n;
+
+ for (n=0; n<nsy; n++) {
+ if (n == ntk) {
+ if (name[0]=='\'') {
+ if (ntk>=MaxTk)
+ die("too many tokens");
+ ntk++;
+ strcpy(is[n].name, name);
+ return n;
+ }
+ n = MaxTk;
+ }
+ if (strcmp(is[n].name, name)==0)
+ return n;
+ }
+ if (add) {
+ if (nsy>=MaxTk+MaxNt)
+ die("too many non-terminals");
+ strcpy(is[nsy].name, name);
+ return nsy++;
+ } else
+ return nsy;
+}
+
+void
+getdecls()
+{
+ int tk, prec, p, a, c, c1, n;
+ Info *si;
+ char type[IdntSz], *s;
+
+ strcpy(is[0].name, "$");
+ ntk = 1;
+ strcpy(is[Sym0].name, "@start");
+ nsy = MaxTk+1;
+ sstart = S;
+ prec = 0;
+ tk = nexttk();
+ for (;;)
+ switch (tk) {
+ case TStart:
+ tk = nexttk();
+ if (tk!=TIdnt)
+ die("syntax error, ident expected after %start");
+ sstart = findsy(idnt, 1);
+ if (sstart<ntk)
+ die("%start cannot specify a token");
+ tk = nexttk();
+ break;
+ case TUnion:
+ tk = nexttk();
+ if (tk!=TLBrack)
+ die("syntax error, { expected after %union");
+ fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
+ s = cpycode();
+ fprintf(fout, "typedef union %s yyunion;\n", s);
+ fprintf(fout, "#define YYSTYPE yyunion\n");
+ if (fhdr) {
+ fprintf(fhdr, "typedef union %s yyunion;\n", s);
+ fprintf(fhdr, "#define YYSTYPE yyunion\n");
+ }
+ free(s);
+ doty = 1;
+ tk = nexttk();
+ break;
+ case TLeft:
+ p = ++prec;
+ a = ALeft;
+ goto addtoks;
+ case TRight:
+ p = ++prec;
+ a = ARight;
+ goto addtoks;
+ case TNonassoc:
+ p = ++prec;
+ a = ANonassoc;
+ goto addtoks;
+ case TToken:
+ p = 0;
+ a = ANone;
+ addtoks:
+ tk = gettype(type);
+ while (tk==TIdnt || tk==TTokchr) {
+ si = 0;
+ n = findsy(idnt, 0);
+ if (n>=MaxTk && n<nsy)
+ die("non-terminal redeclared as token");
+ if (n==nsy) {
+ if (ntk>=MaxTk)
+ die("too many tokens");
+ n = ntk++;
+ }
+ si = &is[n];
+ strcpy(si->name, idnt);
+ strcpy(si->type, type);
+ si->prec = p;
+ si->assoc = a;
+ tk = nexttk();
+ }
+ break;
+ case TType:
+ tk = gettype(type);
+ if (type[0]==0)
+ die("syntax error, type expected");
+ while (tk==TIdnt) {
+ si = 0;
+ n = findsy(idnt, 1);
+ if (n<ntk)
+ die("token redeclared as non-terminal");
+ if (n==nsy) {
+ nsy++;
+ }
+ si = &is[n];
+ strcpy(si->name, idnt);
+ strcpy(si->type, type);
+ tk = nexttk();
+ }
+ break;
+ case TLL:
+ fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
+ for (;;) {
+ c = fgetc(fin);
+ if (c == EOF)
+ die("syntax error, unclosed %{");
+ if (c == '%') {
+ c1 = fgetc(fin);
+ if (c1 == '}') {
+ fputs("\n", fout);
+ break;
+ }
+ ungetc(c1, fin);
+ }
+ if (c == '\n')
+ lineno++;
+ fputc(c, fout);
+ }
+ tk = nexttk();
+ break;
+ case TPP:
+ return;
+ case TEof:
+ die("syntax error, unfinished declarations");
+ default:
+ die("syntax error, declaration expected");
+ }
+}
+
+void
+getgram()
+{
+ extern char *retcode;
+ int tk;
+ Sym hd, *p, s;
+ Rule *r;
+
+ for (;;) {
+ tk = nexttk();
+ if (tk==TPP || tk==TEof) {
+ if (sstart==S)
+ die("syntax error, empty grammar");
+ r = &rs[nrl++];
+ r->lhs = Sym0;
+ r->rhs[0] = sstart;
+ r->rhs[1] = 0;
+ r->rhs[2] = S;
+ r->act = retcode;
+ qsort(rs, nrl, sizeof rs[0], rcmp);
+ return;
+ }
+ if (tk!=TIdnt)
+ die("syntax error, production rule expected");
+ if (nexttk()!=TColon)
+ die("syntax error, colon expected after production's head");
+ hd = findsy(idnt, 1);
+ if (sstart==S)
+ sstart = hd;
+ do {
+ if (nrl>=MaxRl-1)
+ die("too many rules");
+ r = &rs[nrl++];
+ r->lhs = hd;
+ r->act = 0;
+ p = r->rhs;
+ while ((tk=nexttk())==TIdnt || tk==TTokchr || tk==TPrec) {
+ if (tk==TPrec) {
+ tk = nexttk();
+ if (tk!=TIdnt
+ || (s=findsy(idnt, 0))>=ntk)
+ die("token expected after %prec");
+ r->prec = is[s].prec;
+ continue;
+ }
+ s = findsy(idnt, 1);
+ *p++ = s;
+ if (s<ntk && is[s].prec>0)
+ r->prec = is[s].prec;
+ if (p-r->rhs >= MaxRhs-1)
+ die("production rule too long");
+ }
+ *p = S;
+ if (tk==TLBrack) {
+ r->actln = lineno;
+ r->act = cpycode();
+ tk = nexttk();
+ }
+ } while (tk==TBar);
+ if (tk!=TSemi)
+ die("syntax error, ; or | expected");
+ }
+}
+
+void
+actout(Rule *r)
+{
+ long l;
+ int i, ar;
+ char c, *p, *ty, tya[IdntSz];
+
+ ar = slen(r->rhs);
+ p = r->act;
+ i = r->actln;
+ if (!p)
+ return;
+ while ((c=*p++))
+ switch (c) {
+ case '\n':
+ i++;
+ default:
+ fputc(c, fout);
+ break;
+ case '$':
+ c = *p++;
+ if (c == '$') {
+ fprintf(fout, "yyval");
+ if (doty) {
+ ty = is[r->lhs].type;
+ if (!ty[0]) {
+ lineno = i;
+ die("$$ has no type");
+ }
+ fprintf(fout, ".%s", ty);
+ }
+ }
+ else if (c == '<') {
+ ty = tya;
+ while (istok(*p) && ty-tya<IdntSz-1)
+ *ty++ = *p++;
+ *ty = 0;
+ if (*p++!='>') {
+ lineno = i;
+ die("unclosed tag field");
+ }
+ ty = tya;
+ c = *p++;
+ if (c == '$') {
+ fprintf(fout, "yyval.%s", ty);
+ } else {
+ if (!isdigit(c)) {
+ lineno = i;
+ die("number or $ expected afer tag field");
+ }
+ goto readnum;
+ }
+ }
+ else if (isdigit(c)) {
+ ty = 0;
+ readnum:
+ l = strtol(p-1, &p, 10);
+ if (l > ar) {
+ lineno = i;
+ die("invalid $n");
+ }
+ fprintf(fout, "ps[%d].val", (int)l);
+ if (doty) {
+ if (!ty && l>0)
+ ty = is[r->rhs[l-1]].type;
+ if (!ty || !ty[0]) {
+ lineno = i;
+ die("$n has no type");
+ }
+ fprintf(fout, ".%s", ty);
+ }
+ }
+ }
+ fputs("\n", fout);
+}
+
+void
+codeout()
+{
+ extern char *code0[], *code1[];
+ char **p;
+ Rule *r;
+ int n, c;
+
+ for (p=code0; *p; p++)
+ fputs(*p, fout);
+ for (n=0; n<nrl; n++) {
+ fprintf(fout, "\tcase %d:\n", n);
+ r = &rs[n];
+ fprintf(fout, "#line %d \"%s\"\n", r->actln, srca);
+ actout(r);
+ fputs("\t\tbreak;\n", fout);
+ }
+ for (p=code1; *p; p++)
+ fputs(*p, fout);
+ fprintf(fout, "#line %d \"%s\"\n", lineno, srca);
+ while ((c=fgetc(fin))!=EOF)
+ fputc(c, fout);
+}
+
+void
+init(int ac, char *av[])
+{
+ int c, vf, df;
+ char *pref, buf[100], *opt;
+
+ (void) ac;
+ pref = "y";
+ vf = df = 0;
+ for (av++; av[0] && av[0][0]=='-'; av++)
+ for (opt = &av[0][1]; (c = *opt); opt++)
+ switch (c) {
+ case 'v':
+ vf = 1;
+ break;
+ case 'd':
+ df = 1;
+ break;
+ case 'b':
+ if ((pref = *++av))
+ break;
+ default:
+ usage:
+ fputs("usage: myacc [-vd] [-b file_prefix] grammar\n", stderr);
+ exit(1);
+ }
+
+ if (!(srca = *av))
+ goto usage;
+ fin = fopen(srca, "r");
+ if (strlen(pref) + 10 > sizeof buf)
+ die("-b prefix too long");
+ sprintf(buf, "%s.tab.c", pref);
+ fout = fopen(buf, "w");
+ if (vf) {
+ sprintf(buf, "%s.output", pref);
+ fgrm = fopen(buf, "w");
+ }
+ if (df) {
+ sprintf(buf, "%s.tab.h", pref);
+ fhdr = fopen(buf, "w");
+ if (fhdr) {
+ fprintf(fhdr, "#ifndef Y_TAB_H_\n");
+ fprintf(fhdr, "#define Y_TAB_H_\n");
+ }
+ }
+ if (!fin || !fout || (!fgrm && vf) || (!fhdr && df))
+ die("cannot open work files");
+}
+
+int
+main(int ac, char *av[])
+{
+
+ init(ac, av);
+ getdecls();
+ getgram();
+ ginit();
+ stgen();
+ tblgen();
+ stdump();
+ actgen();
+ tblout();
+ codeout();
+
+ if (srconf)
+ fprintf(stderr, "%d shift/reduce conflicts\n", srconf);
+ if (rrconf)
+ fprintf(stderr, "%d reduce/reduce conflicts\n", rrconf);
+
+ exit(0);
+}
+
+/* Glorious macros.
+ |sed 's|.*|"&\\n",|'
+*/
+
+char *retcode = "\t\tyyval = ps[1].val; return 0;";
+
+char *code0[] = {
+"\n",
+"#ifndef YYSTYPE\n",
+"#define YYSTYPE int\n",
+"#endif\n",
+"YYSTYPE yylval;\n",
+"\n",
+"int\n",
+"yyparse()\n",
+"{\n",
+" enum {\n",
+" StackSize = 100,\n",
+" ActSz = sizeof yyact / sizeof yyact[0],\n",
+" };\n",
+" struct {\n",
+" YYSTYPE val;\n",
+" int state;\n",
+" } stk[StackSize], *ps;\n",
+" int r, h, n, s, tk;\n",
+" YYSTYPE yyval;\n",
+"\n",
+" ps = stk;\n",
+" ps->state = s = yyini;\n",
+" tk = -1;\n",
+"loop:\n",
+" n = yyadsp[s];\n",
+" if (tk < 0 && n > -yyntoks)\n",
+" tk = yytrns[yylex()];\n",
+" n += tk;\n",
+" if (n < 0 || n >= ActSz || yychk[n] != tk) {\n",
+" r = yyadef[s];\n",
+" if (r < 0)\n",
+" return -1;\n",
+" goto reduce;\n",
+" }\n",
+" n = yyact[n];\n",
+" if (n == -1)\n",
+" return -1;\n",
+" if (n < 0) {\n",
+" r = - (n+2);\n",
+" goto reduce;\n",
+" }\n",
+" tk = -1;\n",
+" yyval = yylval;\n",
+"stack:\n",
+" ps++;\n",
+" if (ps-stk >= StackSize)\n",
+" return -2;\n",
+" s = n;\n",
+" ps->state = s;\n",
+" ps->val = yyval;\n",
+" goto loop;\n",
+"reduce:\n",
+" ps -= yyr1[r];\n",
+" h = yyr2[r];\n",
+" s = ps->state;\n",
+" n = yygdsp[h] + s;\n",
+" if (n < 0 || n >= ActSz || yychk[n] != yyntoks+h)\n",
+" n = yygdef[h];\n",
+" else\n",
+" n = yyact[n];\n",
+" switch (r) {\n",
+0
+};
+
+char *code1[] = {
+" }\n",
+" goto stack;\n",
+"}\n",
+0
+};