summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp/pcase.el
blob: b922e0b0233a0927c27afb576c479b61bdacf817 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
;;; pcase.el --- ML-style pattern-matching macro for Elisp

;; Copyright (C) 2010  Free Software Foundation, Inc.

;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
;; Keywords: 

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; ML-style pattern matching.
;; The entry points are autoloaded.

;;; Code:

(eval-when-compile (require 'cl))

;; Macro-expansion of pcase is reasonably fast, so it's not a problem
;; when byte-compiling a file, but when interpreting the code, if the pcase
;; is in a loop, the repeated macro-expansion becomes terribly costly, so we
;; memoize previous macro expansions to try and avoid recomputing them
;; over and over again.
(defconst pcase-memoize (make-hash-table :weakness t :test 'equal))

;;;###autoload
(defmacro pcase (exp &rest cases)
  "Perform ML-style pattern matching on EXP.
CASES is a list of elements of the form (UPATTERN CODE...).

UPatterns can take the following forms:
  _		matches anything.
  SYMBOL	matches anything and binds it to SYMBOL.
  (or UPAT...)	matches if any of the patterns matches.
  (and UPAT...)	matches if all the patterns match.
  `QPAT		matches if the QPattern QPAT matches.
  (pred PRED)	matches if PRED applied to the object returns non-nil.

QPatterns can take the following forms:
  (QPAT1 . QPAT2)	matches if QPAT1 matches the car and QPAT2 the cdr.
  ,UPAT			matches if the UPattern UPAT matches.
  ATOM			matches if the object is `eq' to ATOM.
QPatterns for vectors are not implemented yet.

PRED can take the form
  FUNCTION	in which case it gets called with one argument.
  (FUN ARG1 .. ARGN) in which case it gets called with N+1 arguments.
A PRED of the form FUNCTION is equivalent to one of the form (FUNCTION).
PRED patterns can refer to variables bound earlier in the pattern.
E.g. you can match pairs where the cdr is larger than the car with a pattern
like `(,a . ,(pred (< a))) or, with more checks:
`(,(and a (pred numberp)) . ,(and (pred numberp) (pred (< a))))"
  (declare (indent 1) (debug case))
  (or (gethash (cons exp cases) pcase-memoize)
      (puthash (cons exp cases)
               (pcase-expand exp cases)
               pcase-memoize)))

;;;###autoload
(defmacro pcase-let* (bindings body)
  "Like `let*' but where you can use `pcase' patterns for bindings.
BODY should be an expression, and BINDINGS should be a list of bindings
of the form (UPAT EXP)."
  (if (null bindings) body
    `(pcase ,(cadr (car bindings))
       (,(caar bindings) (pcase-let* ,(cdr bindings) ,body))
       (t (error "Pattern match failure in `pcase-let'")))))

;;;###autoload
(defmacro pcase-let (bindings body)
  "Like `let' but where you can use `pcase' patterns for bindings.
BODY should be an expression, and BINDINGS should be a list of bindings
of the form (UPAT EXP)."
  (if (null (cdr bindings))
      `(pcase-let* ,bindings ,body)
    (setq bindings (mapcar (lambda (x) (cons (make-symbol "x") x)) bindings))
    `(let ,(mapcar (lambda (binding) (list (nth 0 binding) (nth 2 binding)))
                   bindings)
       (pcase-let*
        ,(mapcar (lambda (binding) (list (nth 1 binding) (nth 0 binding)))
                 bindings)
        ,body))))

(defun pcase-expand (exp cases)
  (let* ((defs (if (symbolp exp) '()
                 (let ((sym (make-symbol "x")))
                   (prog1 `((,sym ,exp)) (setq exp sym)))))
         (seen '())
         (codegen
          (lambda (code vars)
            (let ((prev (assq code seen)))
              (if (not prev)
                  (let ((res (pcase-codegen code vars)))
                    (push (list code vars res) seen)
                    res)
                ;; Since we use a tree-based pattern matching
                ;; technique, the leaves (the places that contain the
                ;; code to run once a pattern is matched) can get
                ;; copied a very large number of times, so to avoid
                ;; code explosion, we need to keep track of how many
                ;; times we've used each leaf and move it
                ;; to a separate function if that number is too high.
                ;;
                ;; We've already used this branch.  So it is shared.
                (destructuring-bind (code prevvars res) prev
                  (unless (symbolp res)
                    ;; This is the first repeat, so we have to move
                    ;; the branch to a separate function.
                    (let ((bsym
                           (make-symbol (format "pcase-%d" (length defs)))))
                      (push `(,bsym (lambda ,(mapcar #'car prevvars) ,@code)) defs)
                      (setcar res 'funcall)
                      (setcdr res (cons bsym (mapcar #'cdr prevvars)))
                      (setcar (cddr prev) bsym)
                      (setq res bsym)))
                  (setq vars (copy-sequence vars))
                  (let ((args (mapcar (lambda (pa)
                                        (let ((v (assq (car pa) vars)))
                                          (setq vars (delq v vars))
                                          (cdr v)))
                                      prevvars)))
                    (when vars          ;New additional vars.
                      (error "The vars %s are only bound in some paths"
                             (mapcar #'car vars)))
                    `(funcall ,res ,@args)))))))
         (main
          (pcase-u
           (mapcar (lambda (case)
                     `((match ,exp . ,(car case))
                       ,(apply-partially
                         (if (pcase-small-branch-p (cdr case))
                             ;; Don't bother sharing multiple
                             ;; occurrences of this leaf since it's small.
                             #'pcase-codegen codegen)
                         (cdr case))))
                   cases))))
    `(let ,defs ,main)))

(defun pcase-codegen (code vars)
  `(let ,(mapcar (lambda (b) (list (car b) (cdr b))) vars)
     ,@code))

(defun pcase-small-branch-p (code)
  (and (= 1 (length code))
       (or (not (consp (car code)))
           (let ((small t))
             (dolist (e (car code))
               (if (consp e) (setq small nil)))
             small))))

;; Try to use `cond' rather than a sequence of `if's, so as to reduce
;; the depth of the generated tree.
(defun pcase-if (test then else)
  (cond
   ((eq else :pcase-dontcare) then)
   ((eq (car-safe else) 'if)
    `(cond (,test ,then)
           (,(nth 1 else) ,(nth 2 else))
           (t ,@(nthcdr 3 else))))
   ((eq (car-safe else) 'cond)
    `(cond (,test ,then)
           ,@(cdr else)))
   (t `(if ,test ,then ,else))))

(defun pcase-upat (qpattern)
  (cond
   ((eq (car-safe qpattern) '\,) (cadr qpattern))
   (t (list '\` qpattern))))

;; Note about MATCH:
;; When we have patterns like `(PAT1 . PAT2), after performing the `consp'
;; check, we want to turn all the similar patterns into ones of the form
;; (and (match car PAT1) (match cdr PAT2)), so you naturally need conjunction.
;; Earlier code hence used branches of the form (MATCHES . CODE) where
;; MATCHES was a list (implicitly a conjunction) of (SYM . PAT).
;; But if we have a pattern of the form (or `(PAT1 . PAT2) PAT3), there is
;; no easy way to eliminate the `consp' check in such a representation.
;; So we replaced the MATCHES by the MATCH below which can be made up
;; of conjunctions and disjunctions, so if we know `foo' is a cons, we can
;; turn (match foo . (or `(PAT1 . PAT2) PAT3)) into
;; (or (and (match car . `PAT1) (match cdr . `PAT2)) (match foo . PAT3)).
;; The downside is that we now have `or' and `and' both in MATCH and
;; in PAT, so there are different equivalent representations and we
;; need to handle them all.  We do not try to systematically
;; canonicalize them to one form over another, but we do occasionally
;; turn one into the other.

(defun pcase-u (branches)
  "Expand matcher for rules BRANCHES.
Each BRANCH has the form (MATCH CODE . VARS) where
CODE is the code generator for that branch.
VARS is the set of vars already bound by earlier matches.
MATCH is the pattern that needs to be matched, of the form:
  (match VAR . UPAT)
  (and MATCH ...)
  (or MATCH ...)"
  (when (setq branches (delq nil branches))
    (destructuring-bind (match code &rest vars) (car branches)
      (pcase-u1 (list match) code vars (cdr branches)))))

(defun pcase-and (match matches)
  (if matches `(and ,match ,@matches) match))

(defun pcase-split-match (sym splitter match)
  (case (car match)
    ((match)
     (if (not (eq sym (cadr match)))
         (cons match match)
       (let ((pat (cddr match)))
         (cond
          ;; Hoist `or' and `and' patterns to `or' and `and' matches.
          ((memq (car-safe pat) '(or and))
           (pcase-split-match sym splitter
                              (cons (car pat)
                                    (mapcar (lambda (alt)
                                              `(match ,sym . ,alt))
                                            (cdr pat)))))
          (t (let ((res (funcall splitter (cddr match))))
               (cons (or (car res) match) (or (cdr res) match))))))))
    ((or and)
     (let ((then-alts '())
           (else-alts '())
           (neutral-elem (if (eq 'or (car match)) :pcase-fail :pcase-succeed))
           (zero-elem (if (eq 'or (car match)) :pcase-succeed :pcase-fail)))
       (dolist (alt (cdr match))
         (let ((split (pcase-split-match sym splitter alt)))
           (unless (eq (car split) neutral-elem)
             (push (car split) then-alts))
           (unless (eq (cdr split) neutral-elem)
             (push (cdr split) else-alts))))
       (cons (cond ((memq zero-elem then-alts) zero-elem)
                   ((null then-alts) neutral-elem)
                   ((null (cdr then-alts)) (car then-alts))
                   (t (cons (car match) (nreverse then-alts))))
             (cond ((memq zero-elem else-alts) zero-elem)
                   ((null else-alts) neutral-elem)
                   ((null (cdr else-alts)) (car else-alts))
                   (t (cons (car match) (nreverse else-alts)))))))
    (t (error "Uknown MATCH %s" match))))

(defun pcase-split-rest (sym splitter rest)
  (let ((then-rest '())
        (else-rest '()))
    (dolist (branch rest)
      (let* ((match (car branch))
             (code&vars (cdr branch))
             (splitted
              (pcase-split-match sym splitter match)))
        (unless (eq (car splitted) :pcase-fail)
          (push (cons (car splitted) code&vars) then-rest))
        (unless (eq (cdr splitted) :pcase-fail)
          (push (cons (cdr splitted) code&vars) else-rest))))
    (cons (nreverse then-rest) (nreverse else-rest))))

(defun pcase-split-consp (syma symd pat)
  (cond
   ;; A QPattern for a cons, can only go the `then' side.
   ((and (eq (car-safe pat) '\`) (consp (cadr pat)))
    (let ((qpat (cadr pat)))
      (cons `(and (match ,syma . ,(pcase-upat (car qpat)))
                  (match ,symd . ,(pcase-upat (cdr qpat))))
            :pcase-fail)))
   ;; A QPattern but not for a cons, can only go the `else' side.
   ((eq (car-safe pat) '\`) (cons :pcase-fail nil))))

(defun pcase-split-eq (elem pat)
  (cond
   ;; The same match will give the same result.
   ((and (eq (car-safe pat) '\`) (equal (cadr pat) elem))
    (cons :pcase-succeed :pcase-fail))
   ;; A different match will fail if this one succeeds.
   ((and (eq (car-safe pat) '\`)
         ;; (or (integerp (cadr pat)) (symbolp (cadr pat))
         ;;     (consp (cadr pat)))
         )
    (cons :pcase-fail nil))))

(defun pcase-split-memq (elems pat)
  ;; Based on pcase-split-eq.
  (cond
   ;; The same match will give the same result, but we don't know how
   ;; to check it.
   ;; (???
   ;;  (cons :pcase-succeed nil))
   ;; A match for one of the elements may succeed or fail.
   ((and (eq (car-safe pat) '\`) (member (cadr pat) elems))
    nil)
   ;; A different match will fail if this one succeeds.
   ((and (eq (car-safe pat) '\`)
         ;; (or (integerp (cadr pat)) (symbolp (cadr pat))
         ;;     (consp (cadr pat)))
         )
    (cons :pcase-fail nil))))

(defun pcase-split-pred (upat pat)
  ;; FIXME: For predicates like (pred (> a)), two such predicates may
  ;; actually refer to different variables `a'.
  (if (equal upat pat)
      (cons :pcase-succeed :pcase-fail)))

(defun pcase-fgrep (vars sexp)
  "Check which of the symbols VARS appear in SEXP."
  (let ((res '()))
    (while (consp sexp)
      (dolist (var (pcase-fgrep vars (pop sexp)))
        (unless (memq var res) (push var res))))
    (and (memq sexp vars) (not (memq sexp res)) (push sexp res))
    res))

;; It's very tempting to use `pcase' below, tho obviously, it'd create
;; bootstrapping problems.
(defun pcase-u1 (matches code vars rest)
  "Return code that runs CODE (with VARS) if MATCHES match.
and otherwise defers to REST which is a list of branches of the form
\(ELSE-MATCH ELSE-CODE . ELSE-VARS)."
  ;; Depending on the order in which we choose to check each of the MATCHES,
  ;; the resulting tree may be smaller or bigger.  So in general, we'd want
  ;; to be careful to chose the "optimal" order.  But predicate
  ;; patterns make this harder because they create dependencies
  ;; between matches.  So we don't bother trying to reorder anything.
  (cond
   ((null matches) (funcall code vars))
   ((eq :pcase-fail (car matches)) (pcase-u rest))
   ((eq :pcase-succeed (car matches))
    (pcase-u1 (cdr matches) code vars rest))
   ((eq 'and (caar matches))
    (pcase-u1 (append (cdar matches) (cdr matches)) code vars rest))
   ((eq 'or (caar matches))
    (let* ((alts (cdar matches))
           (var (if (eq (caar alts) 'match) (cadr (car alts))))
           (simples '()) (others '()))
      (when var
        (dolist (alt alts)
          (if (and (eq (car alt) 'match) (eq var (cadr alt))
                   (let ((upat (cddr alt)))
                     (and (eq (car-safe upat) '\`)
                          (or (integerp (cadr upat)) (symbolp (cadr upat))))))
              (push (cddr alt) simples)
            (push alt others))))
      (cond
       ((null alts) (error "Please avoid it") (pcase-u rest))
       ((> (length simples) 1)
        ;; De-hoist the `or' MATCH into an `or' pattern that will be
        ;; turned into a `memq' below.
        (pcase-u1 (cons `(match ,var or . ,(nreverse simples)) (cdr matches))
                  code vars
                  (if (null others) rest
                    (cons (list*
                           (pcase-and (if (cdr others)
                                          (cons 'or (nreverse others))
                                        (car others))
                                      (cdr matches))
                           code vars)
                          rest))))
       (t
        (pcase-u1 (cons (pop alts) (cdr matches)) code vars
                  (if (null alts) (progn (error "Please avoid it") rest)
                    (cons (list*
                           (pcase-and (if (cdr alts)
                                          (cons 'or alts) (car alts))
                                      (cdr matches))
                           code vars)
                          rest)))))))
   ((eq 'match (caar matches))
    (destructuring-bind (op sym &rest upat) (pop matches)
      (cond
       ((memq upat '(t _)) (pcase-u1 matches code vars rest))
       ((eq upat 'dontcare) :pcase-dontcare)
       ((functionp upat)  (error "Feature removed, use (pred %s)" upat))
       ((eq (car-safe upat) 'pred)
        (destructuring-bind (then-rest &rest else-rest)
            (pcase-split-rest
             sym (apply-partially 'pcase-split-pred upat) rest)
          (pcase-if (if (symbolp (cadr upat))
                        `(,(cadr upat) ,sym)
                      (let* ((exp (cadr upat))
                             ;; `vs' is an upper bound on the vars we need.
                             (vs (pcase-fgrep (mapcar #'car vars) exp))
                             (call (if (functionp exp)
                                       `(,exp ,sym) `(,@exp ,sym))))
                        (if (null vs)
                            call
                          ;; Let's not replace `vars' in `exp' since it's
                          ;; too difficult to do it right, instead just
                          ;; let-bind `vars' around `exp'.
                          `(let ,(mapcar (lambda (var)
                                           (list var (cdr (assq var vars))))
                                         vs)
                             ;; FIXME: `vars' can capture `sym'.  E.g.
                             ;; (pcase x ((and `(,x . ,y) (pred (fun x)))))
                             ,call))))
                    (pcase-u1 matches code vars then-rest)
                    (pcase-u else-rest))))
       ((symbolp upat)
        (pcase-u1 matches code (cons (cons upat sym) vars) rest))
       ((eq (car-safe upat) '\`)
        (pcase-q1 sym (cadr upat) matches code vars rest))
       ((eq (car-safe upat) 'or)
        (let ((all (> (length (cdr upat)) 1)))
          (when all
            (dolist (alt (cdr upat))
              (unless (and (eq (car-safe alt) '\`)
                           (or (symbolp (cadr alt)) (integerp (cadr alt))))
                (setq all nil))))
          (if all
              ;; Use memq for (or `a `b `c `d) rather than a big tree.
              (let ((elems (mapcar 'cadr (cdr upat))))
                (destructuring-bind (then-rest &rest else-rest)
                    (pcase-split-rest
                     sym (apply-partially 'pcase-split-memq elems) rest)
                  (pcase-if `(memq ,sym ',elems)
                            (pcase-u1 matches code vars then-rest)
                            (pcase-u else-rest))))
            (pcase-u1 (cons `(match ,sym ,@(cadr upat)) matches) code vars
                      (append (mapcar (lambda (upat)
                                        `((and (match ,sym . ,upat) ,@matches)
                                          ,code ,@vars))
                                      (cddr upat))
                              rest)))))
       ((eq (car-safe upat) 'and)
        (pcase-u1 (append (mapcar (lambda (upat) `(match ,sym ,@upat)) (cdr upat))
                          matches)
                  code vars rest))
       ((eq (car-safe upat) 'not)
        ;; FIXME: The implementation below is naive and results in
        ;; inefficient code.
        ;; To make it work right, we would need to turn pcase-u1's
        ;; `code' and `vars' into a single argument of the same form as
        ;; `rest'.  We would also need to split this new `then-rest' argument
        ;; for every test (currently we don't bother to do it since
        ;; it's only useful for odd patterns like (and `(PAT1 . PAT2)
        ;; `(PAT3 . PAT4)) which the programmer can easily rewrite
        ;; to the more efficient `(,(and PAT1 PAT3) . ,(and PAT2 PAT4))).
        (pcase-u1 `((match ,sym . ,(cadr upat)))
                  (lexical-let ((rest rest))
                    ;; FIXME: This codegen is not careful to share its
                    ;; code if used several times: code blow up is likely.
                    (lambda (vars)
                      ;; `vars' will likely contain bindings which are
                      ;; not always available in other paths to
                      ;; `rest', so there' no point trying to pass
                      ;; them down.
                      (pcase-u rest)))
                  vars
                  (list `((and . ,matches) ,code . ,vars))))
       (t (error "Unknown upattern `%s'" upat)))))
   (t (error "Incorrect MATCH %s" (car matches)))))

(defun pcase-q1 (sym qpat matches code vars rest)
  "Return code that runs CODE if SYM matches QPAT and if MATCHES match.
and if not, defers to REST which is a list of branches of the form
\(OTHER_MATCH OTHER-CODE . OTHER-VARS)."
  (cond
   ((eq (car-safe qpat) '\,) (error "Can't use `,UPATTERN"))
   ((floatp qpat) (error "Floating point patterns not supported"))
   ((vectorp qpat)
    ;; FIXME.
    (error "Vector QPatterns not implemented yet"))
   ((consp qpat)
    (let ((syma (make-symbol "xcar"))
          (symd (make-symbol "xcdr")))
      (destructuring-bind (then-rest &rest else-rest)
          (pcase-split-rest sym (apply-partially 'pcase-split-consp syma symd)
                            rest)
        (pcase-if `(consp ,sym)
                  `(let ((,syma (car ,sym))
                         (,symd (cdr ,sym)))
                     ,(pcase-u1 `((match ,syma . ,(pcase-upat (car qpat)))
                                  (match ,symd . ,(pcase-upat (cdr qpat)))
                                  ,@matches)
                                code vars then-rest))
                  (pcase-u else-rest)))))
   ((or (integerp qpat) (symbolp qpat))
    (destructuring-bind (then-rest &rest else-rest)
        (pcase-split-rest sym (apply-partially 'pcase-split-eq qpat) rest)
      (pcase-if `(eq ,sym ',qpat)
                (pcase-u1 matches code vars then-rest)
                (pcase-u else-rest))))
   (t (error "Unkown QPattern %s" qpat))))


(provide 'pcase)
;;; pcase.el ends here