aboutsummaryrefslogtreecommitdiff
path: root/tools/mgen/main.ml
blob: fb49259e2f199c206fbdf14ddc1596f7865f174c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
open Cgen
open Match

let mgen ~verbose ~fuzz path lofs input oc =
  let info ?(level = 1) fmt =
    if level <= verbose then
      Printf.eprintf fmt
    else
      Printf.ifprintf stdout fmt
  in

  let rules =
    match Sexp.(run_parser ppats) input with
    | `Error (ps, err, loc) ->
        Printf.eprintf "%s:%d:%d %s\n"
          path (lofs + ps.Sexp.line) ps.Sexp.coln err;
        Printf.eprintf "%s" loc;
        exit 1
    | `Ok rules -> rules
  in

  info "adding ac variants...%!";
  let nparsed =
    List.fold_left
      (fun npats (_, _, ps) ->
         npats + List.length ps)
      0 rules
  in
  let varsmap = Hashtbl.create 10 in
  let rules =
    List.concat_map (fun (name, vars, patterns) ->
        (try assert (Hashtbl.find varsmap name = vars)
         with Not_found -> ());
        Hashtbl.replace varsmap name vars;
        List.map
          (fun pattern -> {name; vars; pattern})
          (List.concat_map ac_equiv patterns)
      ) rules
  in
  info " %d -> %d patterns\n"
    nparsed (List.length rules);

  let rnames =
    setify (List.map (fun r -> r.name) rules) in

  info "generating match tables...%!";
  let sa, am, sm = generate_table rules in
  let numbr = make_numberer sa am sm in
  info " %d states, %d rules\n"
    (Array.length sa) (StateMap.cardinal sm);
  if verbose >= 2 then begin
    info "-------------\nstates:\n";
    Array.iteri (fun i s ->
        info "  state %d: %s\n"
          i (show_pattern s.seen)) sa;
    info "-------------\nstatemap:\n";
    Test.print_sm stderr sm;
    info "-------------\n";
  end;

  info "generating matchers...\n";
  let matchers =
    List.map (fun rname ->
        info "+ %s...%!" rname;
        let m = lr_matcher sm sa rules rname in
        let vars = Hashtbl.find varsmap rname in
        info " %d nodes\n" (Action.size m);
        info ~level:2 "  -------------\n";
        info ~level:2 "  automaton:\n";
        info ~level:2 "%s\n"
          (Format.asprintf "    @[%a@]" Action.pp m);
        info ~level:2 "  ----------\n";
        (vars, rname, m)
      ) rnames
  in

  if fuzz then begin
    info ~level:0 "fuzzing statemap...\n";
    let tp = Fuzz.fuzz_numberer rules numbr in
    info ~level:0 "testing %d patterns...\n"
      (List.length rules);
    Fuzz.test_matchers tp numbr rules
  end;

  info "emitting C...\n";
  flush stderr;

  let cgopts =
    { pfx = ""; static = true; oc = oc } in
  emit_c cgopts numbr;
  emit_matchers cgopts matchers;

  ()

let read_all ic =
  let bufsz = 4096 in
  let buf = Bytes.create bufsz in
  let data = Buffer.create bufsz in
  let read = ref 0 in
  while
    read := input ic buf 0 bufsz;
    !read <> 0
  do
    Buffer.add_subbytes data buf 0 !read
  done;
  Buffer.contents data

let split_c src =
  let begin_re, eoc_re, end_re =
    let re = Str.regexp in
    ( re "mgen generated code"
    , re "\\*/"
    , re "end of generated code" )
  in
  let str_match regexp str =
    try
      let _: int =
        Str.search_forward regexp str 0
      in true
    with Not_found -> false
  in

  let rec go st lofs pfx rules lines =
    let line, lines =
      match lines with
      | [] ->
          failwith (
            match st with
            | `Prefix -> "could not find mgen section"
            | `Rules -> "mgen rules not terminated"
            | `Skip -> "mgen section not terminated"
          )
      | l :: ls -> (l, ls)
    in
    match st with
    | `Prefix ->
        let pfx = line :: pfx in
        if str_match begin_re line
        then
          let lofs = List.length pfx in
          go `Rules lofs pfx rules lines
        else go `Prefix 0 pfx rules lines
    | `Rules ->
        let pfx = line :: pfx in
        if str_match eoc_re line
        then go `Skip lofs pfx rules lines
        else go `Rules lofs pfx (line :: rules) lines
    | `Skip ->
        if str_match end_re line then
          let join = String.concat "\n" in
          let pfx = join (List.rev pfx) ^ "\n\n"
          and rules = join (List.rev rules)
          and sfx = join (line :: lines)
          in (lofs, pfx, rules, sfx)
        else go `Skip lofs pfx rules lines
  in

  let lines = String.split_on_char '\n' src in
  go `Prefix 0 [] [] lines

let () =
  let usage_msg =
    "mgen [--fuzz] [--verbose <N>] <file>" in

  let fuzz_arg = ref false in
  let verbose_arg = ref 0 in
  let input_paths = ref [] in

  let anon_fun filename =
    input_paths := filename :: !input_paths in

  let speclist =
    [ ( "--fuzz", Arg.Set fuzz_arg
      , "  Fuzz tables and matchers" )
    ; ( "--verbose", Arg.Set_int verbose_arg
      , "<N>  Set verbosity level" )
    ; ( "--", Arg.Rest_all (List.iter anon_fun)
      , "  Stop argument parsing" ) ]
  in
  Arg.parse speclist anon_fun usage_msg;

  let input_paths = !input_paths in
  let verbose = !verbose_arg in
  let fuzz = !fuzz_arg in
  let input_path, input =
    match input_paths with
    | ["-"] -> ("-", read_all stdin)
    | [path] -> (path, read_all (open_in path))
    | _ ->
        Printf.eprintf
          "%s: single input file expected\n"
          Sys.argv.(0);
        Arg.usage speclist usage_msg; exit 1
  in
  let mgen = mgen ~verbose ~fuzz in

  if Str.last_chars input_path 2 <> ".c"
  then mgen input_path 0 input stdout
  else
    let tmp_path = input_path ^ ".tmp" in
    Fun.protect
      ~finally:(fun () ->
          try Sys.remove tmp_path with _ -> ())
      (fun () ->
         let lofs, pfx, rules, sfx = split_c input in
         let oc = open_out tmp_path in
         output_string oc pfx;
         mgen input_path lofs rules oc;
         output_string oc sfx;
         close_out oc;
         Sys.rename tmp_path input_path;
         ());

  ()