summaryrefslogtreecommitdiff
path: root/Makefile.menhir
blob: fc13e5e70cc2c655d2344621063211f7b0174f04 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
#**************************************************************************
#*                                                                        *
#*                                 OCaml                                  *
#*                                                                        *
#*            Gabriel Scherer, projet Parsifal, INRIA Saclay              *
#*                                                                        *
#*   Copyright 2018 Institut National de Recherche en Informatique et     *
#*     en Automatique.                                                    *
#*                                                                        *
#*   All rights reserved.  This file is distributed under the terms of    *
#*   the GNU Lesser General Public License version 2.1, with the          *
#*   special exception on linking described in the file LICENSE.          *
#*                                                                        *
#**************************************************************************

# The rules in this Makefile use Menhir to rebuild the OCaml compiler
# parser. They are included in the main Makefile, so should be invoked
# directly, for example 'make promote-menhir'. They must be called
# after any modification to parsing/parser.mly, for the modification
# to affect the parser linked in the produced compiler:
#
# - promote-menhir builds the parser from parser.mly and stores it in
#   the boot/ directory, so that future builds of the compiler use the
#   updated result. Use it to make permanent changes to the compiler
#   parser.
#
# - demote-menhir undoes the effect of promote-menhir. The files in
#   the boot/ directory that are affected by promote-menhir and are
#   under version control are restored to their normal state (HEAD).
#
# - test-menhir builds the parser from parser.mly without storing it
#   in the boot/ directory, and only checks that the generated parser
#   builds correctly. Use it to quickly check if a parser.mly change
#   breaks the build. If you want to test a compiler produced with
#   the new parser, you must use promote-menhir instead.
#   (Using this rule requires a partial compiler build as obtained
#    by 'make core' or 'make world'.)
#
# - clean-menhir removes the files generated by Menhir from parsing/,
#   keeping only the reference sources for the grammar.
#
# - depend-menhir updates the dependency information for the
#   Menhir-generated parser, which is versioned in the OCaml repository
#   like all other .depend files. It should be used when the dependencies
#   (of the OCaml code in the grammar semantic actions) change.

MENHIR ?= menhir

## Unused tokens

# The tokens COMMENT, DOCSTRING and EOL are produced by special lexer
# modes used by other consumers than the parser.

# GREATERBRACKET ">]" was added by the parser by symmetry with "[<"
# (which is used in polymorphic variant), but is not currently used by
# the grammar.

unused_tokens := COMMENT DOCSTRING EOL GREATERRBRACKET

## Menhir's flags.

# The basic flags influence the analysis of the grammar and the construction
# of the automaton. The complete set of flags includes extra flags that
# influence type inference and code production.

MENHIRBASICFLAGS := \
  --lalr \
  --explain \
  --dump \
  --require-aliases \
  --strict \
  -lg 1 \
  -la 1 \
  $(addprefix --unused-token ,$(unused_tokens)) \

MENHIRFLAGS := \
  $(MENHIRBASICFLAGS) \
  --infer \
  --ocamlc "$(CAMLC) $(OC_COMMON_COMPFLAGS) $(INCLUDES)" \
  --fixed-exception \
  --table \
  --strategy simplified \

## promote-menhir

.PHONY: promote-menhir
promote-menhir: parsing/parser.mly
	@ $(MAKE) import-menhirLib
	$(MENHIR) $(MENHIRFLAGS) parsing/parser.mly
# The generated parser.ml may contain lexer directives containing
# the absolute path to Menhir's standard library on the promoter's machine.
# This is benign but will generate pointless churn if another developer
# rebuilds the same grammar (from the same Menhir version).
	@ for f in $(addprefix parser.,ml mli) ; do \
	  sed \
	    's,^#\(.*\)"[^"]*/menhir/standard.mly",#\1"menhir/standard.mly",g' \
	    parsing/$$f \
	    > boot/menhir/$$f; \
	  rm parsing/$$f; \
	done

# The import-menhirLib invocation in promote-menhir ensures that each
# update of the boot/ parser is paired with an update of the imported
# menhirLib; otherwise it would be easy to generate a parser and keep
# an incompatible version of menhirLib, which would fail at
# compile-time.

.PHONY: import-menhirLib
import-menhirLib:
	@ mkdir -p boot/menhir
	@ cp \
           $(addprefix `$(MENHIR) --suggest-menhirLib`/menhirLib.,ml mli) \
           boot/menhir


## demote-menhir

DEMOTE:=menhirLib.ml menhirLib.mli parser.ml parser.mli

.PHONY: demote-menhir
demote-menhir:
	git checkout HEAD -- $(addprefix boot/menhir/,$(DEMOTE))

## test-menhir

# This rule assumes that the `parsing/` sources and its dependencies
# have already been compiled; 'make core' suffices to be in that
# state. We don't make 'core' an explicit dependency, as building
# 'test-menhir' repeatedly would rebuild the compiler each time
# (parser.ml has changed), without actually taking the changes from
# parser.mly into account ('core' uses the parser from boot/).

# The test-menhir target does not read or write the boot directory,
# it directly builds the parser in parsing/. In particular, it must
# duplicate the MenhirLib->CamlinternalMenhirlib renaming usually
# performed by the parsing/parser.ml import rule in the main
# Makefile.
.PHONY: test-menhir
test-menhir: parsing/parser.mly
	$(MENHIR) $(MENHIRFLAGS) parsing/parser.mly
	for f in $(addprefix parsing/parser.,ml mli) ; do \
	  cat $$f | sed "s/MenhirLib/CamlinternalMenhirLib/g" > $$f.tmp && \
	  mv $$f.tmp $$f ; \
	done
	$(MAKE) parsing/parser.cmo


## clean-menhir

partialclean-menhir::
	rm -f \
	  $(addprefix parsing/parser.,ml mli) \
	  $(addprefix parsing/camlinternalMenhirLib.,ml mli) \
	  $(addprefix parsing/parser.,automaton conflicts) \
	  $(addprefix parsing/parser.,auto.messages) \

clean-menhir: partialclean-menhir


## depend-menhir

# The following rule depends on the OCAMLDEP_CMD variable defined in
# Makefile.common, so it can only be invoked from the main (root) Makefile

.PHONY: depend-menhir
depend-menhir:
	$(MENHIR) --depend --ocamldep "$(OCAMLDEP_CMD)" \
	  parsing/parser.mly > .depend.menhir

include .depend.menhir

## interpret-menhir

# This rule runs Menhir in interactive mode.
# The user can enter sentences, such as:
#   implementation: TYPE LIDENT EQUAL LIDENT EOF
# and see how Menhir interprets them.

interpret-menhir:
	@ echo "Please wait, I am building the LALR automaton..."
	@ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \
	    --interpret \
	    --interpret-show-cst \
	    --trace \

## list-parse-errors

# This rule runs Menhir's reachability analysis, which produces a list of all
# states where a syntax error can be detected (and a corresponding list of of
# erroneous sentences). This data is stored in parsing/parser.auto.messages.
# This analysis requires about 3 minutes and 6GB of RAM.

# The analysis is performed on a copy of the grammar where every block
# of text comprised between the markers BEGIN AVOID and END AVOID has
# been removed. This allows us to avoid certain syntactic forms in the
# sentences that we produce. See parser.mly for more explanations.

# Because of this, we must run Menhir twice: once on a modified copy of the
# grammar to produce the sentences, and once on the original grammar to update
# the auto-comments (which would otherwise be incorrect).

.PHONY: list-parse-errors
list-parse-errors:
	@ tmp=`mktemp -d /tmp/parser.XXXX` && \
	  sed -e '/BEGIN AVOID/,/END AVOID/d' \
	    parsing/parser.mly > $$tmp/parser.mly && \
	  $(MENHIR) $(MENHIRBASICFLAGS) $$tmp/parser.mly \
	    --list-errors -la 2 \
	    > parsing/parser.auto.messages && \
	  rm -rf $$tmp
	@ cp parsing/parser.auto.messages parsing/parser.auto.messages.bak
	@ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \
	    --update-errors parsing/parser.auto.messages.bak \
	    > parsing/parser.auto.messages
	@ rm -f parsing/parser.auto.messages.bak

## generate-parse-errors

# This rule assumes that [make list-parse-errors] has been run first.

# This rule turns the error sentences stored in parsing/parser.auto.messages
# into one .ml file.

# (It would in principle be preferable to create one file per sentence, but
# that would be much slower. We abuse the ability of the OCaml toplevel to
# resynchronize after an error, and put all sentences into a single file.)

# This requires Menhir 20201214 or newer.

GPE_DIR   := tests/generated-parse-errors
GPE_ML    := errors.ml
GPE_REF   := errors.compilers.reference
GPE_START := implementation use_file toplevel_phrase

.PHONY: generate-parse-errors
generate-parse-errors:
	@ \
	mkdir -p testsuite/$(GPE_DIR) && \
	$(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \
	    --echo-errors-concrete parsing/parser.auto.messages 2>/dev/null | \
	(cd testsuite/$(GPE_DIR) && touch $(GPE_REF) && ( \
	  echo "(* TEST\n   * toplevel\n*)" && \
	  while IFS= read -r symbolic ; do \
	    IFS= read -r concrete ; \
	    concrete=$${concrete#### Concrete syntax: } ; \
	    : '$$symbolic is the sentence in symbolic form' ; \
	    : '$$concrete is the sentence in concrete form' ; \
	    case "$$symbolic" in \
	    *": SEMISEMI"*) \
	      : 'If the sentence begins with SEMISEMI, ignore it. Our hack' ; \
	      : 'does not support these sentences, and there are only 6 of' ; \
	      : 'them anyway.' ; \
	      continue ;; \
	    *) \
	      case "$$symbolic" in \
	      *"EOF") \
	        : 'If the sentence ends with EOF, replace it on the fly' ; \
	        : 'with some other token (say, WHEN).' ; \
	        echo "#0 \"$${symbolic%%EOF}WHEN\"" ; \
	        echo "$$concrete when"   ; \
	        echo ";;"                ;; \
	      *) \
	        : 'Emit a # directive containing the symbolic sentence.' ; \
	        echo "#0 \"$$symbolic\"" ; \
	        : 'Emit the concrete sentence.' ; \
	        echo "$$concrete"        ; \
	        : 'Emit a double semicolon to allow resynchronization.' ; \
	        echo ";;"                ;; \
	      esac \
	    esac \
	  done) \
	  > $(GPE_ML) && \
	  : 'Count how many sentences we have emitted, per start symbol.' ; \
	  for symbol in $(GPE_START) ; do \
	    count=$$(grep -h -e "$$symbol:" $(GPE_ML) | wc -l) && \
	    echo "$$count sentences whose start symbol is $$symbol." ; \
	  done \
	)
	@ \
	read -p "Re-generate the expected output for this test? " -n 1 -r && \
	echo && \
	if [[ $$REPLY =~ ^[Yy]$$ ]] ; then \
	  make -C testsuite promote DIR=$(GPE_DIR) >/dev/null 2>&1 && \
	  echo "Done." ; \
	  make classify-parse-errors ; \
	else \
	  echo "OK, stop." ; \
	fi

.PHONY: classify-parse-errors
classify-parse-errors:
	@ ( \
	cd testsuite/$(GPE_DIR) && \
	echo "The parser's output can be described as follows:" && \
	c=$$(grep "^Error: Syntax error" $(GPE_REF) | wc -l) && \
	echo "$${c} syntax errors reported." && \
	c=$$(grep "^Error: Syntax error$$" $(GPE_REF) | wc -l) && \
	echo "$${c} errors without an explanation." && \
	c=$$(grep "^Error: Syntax" $(GPE_REF) | grep expected | wc -l) && \
	echo "$${c} errors with an indication of what was expected." && \
	c=$$(grep "might be unmatched" $(GPE_REF) | wc -l) && \
	echo "$${c} errors with an indication of an unmatched delimiter." && \
	true)