summaryrefslogtreecommitdiff
path: root/lisp/progmodes/cc-awk.el
blob: dbcfa9d991ef29ad0a1fe6aed536ce73d2fe49db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
;;; cc-awk.el --- AWK specific code within cc-mode.

;; Copyright (C) 1988,94,96,2000,01,02,03  Free Software Foundation, Inc.

;; Author: Alan Mackenzie (originally based on awk-mode.el)
;; Maintainer: FSF
;; Keywords: AWK, cc-mode, unix, languages

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;; This file contains (most of) the adaptations to cc-mode required for the
;; integration of AWK Mode.
;; It is organised thusly:
;;   1. The AWK Mode syntax table.
;;   2. Indentation calculation stuff ("c-awk-NL-prop text-property").
;;   3. Syntax-table property/font-locking stuff, but not including the
;;      font-lock-keywords setting.
;;   4. The AWK Mode before/after-change-functions.
;;   5. AWK Mode specific versions of commands like beginning-of-defun.
;; The AWK Mode keymap, abbreviation table, and the mode function itself are
;; in cc-mode.el.

;;; Code:

(eval-when-compile
  (let ((load-path
	 (if (and (boundp 'byte-compile-dest-file)
		  (stringp byte-compile-dest-file))
	     (cons (file-name-directory byte-compile-dest-file) load-path)
	   load-path)))
    (load "cc-bytecomp" nil t)))

(cc-require 'cc-defs)

;; Silence the byte compiler.
(cc-bytecomp-defvar font-lock-mode)	; Checked with boundp before use.

;; Some functions in cc-engine that are used below.  There's a cyclic
;; dependency so it can't be required here.  (Perhaps some functions
;; could be moved to cc-engine to avoid it.)
(cc-bytecomp-defun c-backward-token-1)
(cc-bytecomp-defun c-beginning-of-statement-1)
(cc-bytecomp-defun c-backward-sws)

(defvar awk-mode-syntax-table
  (let ((st (make-syntax-table)))
    (modify-syntax-entry ?\\ "\\" st)
    (modify-syntax-entry ?\n ">   " st)
    (modify-syntax-entry ?\r ">   " st)
    (modify-syntax-entry ?\f ">   " st)
    (modify-syntax-entry ?\# "<   " st)
    ;; / can delimit regexes or be a division operator.  By default we assume
    ;; that it is a division sign, and fix the regexp operator cases with
    ;; `font-lock-syntactic-keywords'.
    (modify-syntax-entry ?/ "." st)     ; ACM 2002/4/27.  
    (modify-syntax-entry ?* "." st)
    (modify-syntax-entry ?+ "." st)
    (modify-syntax-entry ?- "." st)
    (modify-syntax-entry ?= "." st)
    (modify-syntax-entry ?% "." st)
    (modify-syntax-entry ?< "." st)
    (modify-syntax-entry ?> "." st)
    (modify-syntax-entry ?& "." st)
    (modify-syntax-entry ?| "." st)
    (modify-syntax-entry ?_ "_" st)
    (modify-syntax-entry ?\' "." st)
    st)
  "Syntax table in use in AWK Mode buffers.")

;; ACM, 2002/5/29:
;; 
;; The next section of code is about determining whether or not an AWK
;; statement is complete or not.  We use this to indent the following line.
;; The determination is pretty straightforward in C, where a statement ends
;; with either a ; or a }.  Only "while" really gives any trouble there, since
;; it might be the end of a do-while.  In AWK, on the other hand, semicolons
;; are rarely used, and EOLs _usually_ act as "virtual semicolons".  In
;; addition, we have the complexity of escaped EOLs.  The core of this
;; analysis is in the middle of the function
;; c-awk-calculate-NL-prop-prev-line, about 130 lines lower down.
;;
;; To avoid continually repeating this expensive analysis, we "cache" its
;; result in a text-property, c-awk-NL-prop, whose value for a line is set on
;; the EOL (if any) which terminates that line.  Should the property be
;; required for the very last line (which has no EOL), it is calculated as
;; required but not cached.  The c-awk-NL-prop property should be thought of
;; as only really valid immediately after a buffer change, not a permanently
;; set property.  (By contrast, the syntax-table text properties (set by an
;; after-change function) must be constantly updated for the mode to work
;; properly).
;;
;; The valid values for c-awk-NL-prop are:
;;
;; nil The property is not currently set for this line.
;; '#' There is NO statement on this line (at most a comment), and no open
;;     statement from a previous line which could have been completed on this
;;     line.
;; '{' There is an unfinished statement on this (or a previous) line which
;;     doesn't require \s to continue onto another line, e.g. the line ends
;;     with {, or the && operator, or "if (condition)".  Note that even if the
;;     newline is redundantly escaped, it remains a '{' line.
;; '\' There is an escaped newline at the end of this line and this '\' is
;;     essential to the syntax of the program.  (i.e. if it had been a
;;     frivolous \, it would have been ignored and the line been given one of
;;     the other property values.)
;; ';' A statement is completed as the last thing (aside from ws) on the line -
;;     i.e. there is (at least part of) a statement on this line, and the last
;;     statement on the line is complete, OR (2002/10/25) the line is
;;     content-free but terminates a statement from the preceding (continued)
;;     line (which has property \).
;;
;; This set of values has been chosen so that the property's value on a line
;; is completely determined by the contents of the line and the property on
;; the previous line, EXCEPT for where a "while" might be the closing
;; statement of a do-while.

(defun c-awk-after-if-for-while-condition-p (&optional do-lim)
  ;; Are we just after the ) in "if/for/while (<condition>)"?
  ;;
  ;; Note that the end of the ) in a do .... while (<condition>) doesn't
  ;; count, since the purpose of this routine is essentially to decide
  ;; whether to indent the next line.
  ;;
  ;; DO-LIM sets a limit on how far back we search for the "do" of a possible
  ;; do-while.
  (and
   (eq (char-before) ?\))
   (save-excursion
     (let ((par-pos (c-safe (scan-lists (point) -1 0))))
       (when par-pos
         (goto-char par-pos)            ; back over "(...)"
         (c-backward-token-1)           ; BOB isn't a problem.
         (or (looking-at "\\(if\\|for\\)\\>\\([^_]\\|$\\)")
             (and (looking-at "while\\>\\([^_]\\|$\\)") ; Ensure this isn't a do-while.
                  (not (eq (c-beginning-of-statement-1 do-lim)
                           'beginning)))))))))

(defun c-awk-after-function-decl-param-list ()
  ;; Are we just after the ) in "function foo (bar)" ?
  (and (eq (char-before) ?\))
       (save-excursion
         (let ((par-pos (c-safe (scan-lists (point) -1 0))))
           (when par-pos
             (goto-char par-pos)        ; back over "(...)"
             (c-backward-token-1)       ; BOB isn't a problem
             (and (looking-at "[_a-zA-Z][_a-zA-Z0-9]*\\>")
                  (progn (c-backward-token-1)
                         (looking-at "func\\(tion\\)?\\>"))))))))

;; 2002/11/8:  FIXME!  Check c-backward-token-1/2 for success (0 return code).
(defun c-awk-after-continue-token ()
;; Are we just after a token which can be continued onto the next line without
;; a backslash?
  (save-excursion
    (c-backward-token-1)              ; FIXME 2002/10/27.  What if this fails?
    (if (and (looking-at "[&|]") (not (bobp)))
        (backward-char)) ; c-backward-token-1 doesn't do this :-(
    (looking-at "[,{?:]\\|&&\\|||\\|do\\>\\|else\\>")))

(defun c-awk-after-rbrace-or-statement-semicolon ()
  ;; Are we just after a } or a ; which closes a statement?
  ;; Be careful about ;s in for loop control bits.  They don't count!
  (or (eq (char-before) ?\})
      (and
       (eq (char-before) ?\;)
       (save-excursion
         (let ((par-pos (c-safe (scan-lists (point) -1 1))))
           (when par-pos
             (goto-char par-pos) ; go back to containing (
             (not (and (looking-at "(")
                       (c-backward-token-1) ; BOB isn't a problem
                       (looking-at "for\\>")))))))))

(defun c-awk-back-to-contentful-text-or-NL-prop ()
  ;;  Move back to just after the first found of either (i) an EOL which has
  ;;  the c-awk-NL-prop text-property set; or (ii) non-ws text; or (iii) BOB.
  ;;  We return either the value of c-awk-NL-prop (in case (i)) or nil.
  ;;  Calling function can best distinguish cases (ii) and (iii) with (bolp).
  ;;
  ;;  Note that an escaped eol counts as whitespace here.
  ;;
  ;;  Kludge: If c-backward-syntactic-ws gets stuck at a BOL, it is likely
  ;;  that the previous line contains an unterminated string (without \).  In
  ;;  this case, assume that the previous line's c-awk-NL-prop is a ;.
  ;; 
  ;;  POINT MUST BE AT THE START OF A LINE when calling this function.  This
  ;;  is to ensure that the various backward-comment functions will work
  ;;  properly.
  (let ((nl-prop nil)
        bol-pos bsws-pos) ; starting pos for a backward-syntactic-ws call.
    (while ;; We are at a BOL here.  Go back one line each iteration.
        (and
         (not (bobp))
         (not (setq nl-prop (c-get-char-property (1- (point)) 'c-awk-NL-prop)))
         (progn (setq bol-pos (c-point 'bopl))
                (setq bsws-pos (point))
                ;; N.B. the following function will not go back past an EOL if
                ;; there is an open string (without \) on the previous line.
                (c-backward-syntactic-ws bol-pos)
                (or (/= (point) bsws-pos)
                    (progn (setq nl-prop ?\;)
                           nil)))
         ;; If we had a backslash at EOL, c-backward-syntactic-ws will
         ;; have gone backwards over it.  Check the backslash was "real".
         (progn
           (if (looking-at "[ \t]*\\\\+$")
               (if (progn
                     (end-of-line)
                     (search-backward-regexp
                      "\\(^\\|[^\\]\\)\\(\\\\\\\\\\)*\\\\$" ; ODD number of \s at EOL  :-)
                      bol-pos t))
                   (progn (end-of-line)   ; escaped EOL.
                          (backward-char)
                          (c-backward-syntactic-ws bol-pos))
                 (end-of-line)))          ; The \ at eol is a fake.
           (bolp))))
    nl-prop))

(defun c-awk-calculate-NL-prop-prev-line (&optional do-lim)
  ;; Calculate and set the value of the c-awk-NL-prop on the immediately
  ;; preceding EOL.  This may also involve doing the same for several
  ;; preceding EOLs.
  ;; 
  ;; NOTE that if the property was already set, we return it without
  ;; recalculation.  (This is by accident rather than design.)
  ;; 
  ;; Return the property which got set (or was already set) on the previous
  ;; line.  Return nil if we hit BOB.
  ;; 
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (save-excursion
    (save-match-data
      (beginning-of-line)
      (let* ((pos (point))
             (nl-prop (c-awk-back-to-contentful-text-or-NL-prop)))
        ;; We are either (1) at a BOL (with nl-prop containing the previous
        ;; line's c-awk-NL-prop) or (2) after contentful text on a line.  At
        ;; the BOB counts as case (1), so we test next for bolp rather than
        ;; non-nil nl-prop.
        (when (not (bolp))
          (setq nl-prop
                (cond
                 ;; Incomplete statement which doesn't require escaped EOL?
                 ((or (c-awk-after-if-for-while-condition-p do-lim)
                      (c-awk-after-function-decl-param-list)
                      (c-awk-after-continue-token))
                  ?\{)
                 ;; Escaped EOL (where there's also something to continue)?
                 ((and (looking-at "[ \t]*\\\\$")
                       (not (c-awk-after-rbrace-or-statement-semicolon)))
                  ?\\)
                 (t ?\;)))            ; A statement was completed on this line
          (end-of-line)
          (c-put-char-property (point) 'c-awk-NL-prop nl-prop)
          (forward-line))

        ;; We are now at a (possibly empty) sequence of content-free lines.
        ;; Set c-awk-NL-prop on each of these lines's EOL.
        (while (< (point) pos)         ; one content-free line each iteration.
          (cond              ; recalculate nl-prop from previous line's value.
           ((memq nl-prop '(?\; nil)) (setq nl-prop ?\#))
           ((eq nl-prop ?\\)
            (if (not (looking-at "[ \t]*\\\\$")) (setq nl-prop ?\;))) ; was ?\#  2002/10/25
           ;; ?\# (empty line) and ?\{ (open stmt) don't change.
           )
          (forward-line)
          (c-put-char-property (1- (point)) 'c-awk-NL-prop nl-prop))
        nl-prop))))

(defun c-awk-get-NL-prop-prev-line (&optional do-lim)
  ;; Get the c-awk-NL-prop text-property from the previous line, calculating
  ;; it if necessary.  Return nil iff we're already at BOB.
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (if (bobp)
      nil
    (or (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop)
        (c-awk-calculate-NL-prop-prev-line do-lim))))

(defun c-awk-get-NL-prop-cur-line (&optional do-lim)
  ;; Get the c-awk-NL-prop text-property from the current line, calculating it
  ;; if necessary. (As a special case, the property doesn't get set on an
  ;; empty line at EOB (there's no position to set the property on), but the
  ;; function returns the property value an EOL would have got.)
  ;; 
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (save-excursion
    (let ((extra-nl nil))
      (end-of-line)                ; Necessary for the following test to work.
      (when (= (forward-line) 1)        ; if we were on the last line....
        (insert-char ?\n 1) ; ...artificial eol is needed for comment detection.
        (setq extra-nl t))
      (prog1 (c-awk-get-NL-prop-prev-line do-lim)
        (if extra-nl (delete-backward-char 1))))))

(defun c-awk-prev-line-incomplete-p (&optional do-lim)
  ;; Is there an incomplete statement at the end of the previous line?
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (memq (c-awk-get-NL-prop-prev-line do-lim) '(?\\ ?\{)))

(defun c-awk-cur-line-incomplete-p (&optional do-lim)
  ;; Is there an incomplete statement at the end of the current line?
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (memq (c-awk-get-NL-prop-cur-line do-lim) '(?\\ ?\{)))

(defun c-awk-completed-stmt-ws-ends-prev-line-p (&optional do-lim)
  ;; Is there a termination of a statement as the last thing (apart from an
  ;; optional comment) on the previous line?
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (eq (c-awk-get-NL-prop-prev-line do-lim) ?\;))

(defun c-awk-completed-stmt-ws-ends-line-p (&optional pos do-lim)
  ;; Same as previous function, but for the line containing position POS (or
  ;; the current line if POS is omitted).
  ;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (save-excursion
    (if pos (goto-char pos))
    (eq (c-awk-get-NL-prop-cur-line do-lim) ?\;)))

(defun c-awk-after-logical-semicolon (&optional do-lim)
;; Are we at BOL, the preceding EOL being a "logical semicolon"?
;; See c-awk-after-if-for-while-condition-p for a description of DO-LIM.
  (and (bolp)
       (eq (c-awk-get-NL-prop-prev-line do-lim) ?\;)))

(defun c-awk-backward-syntactic-ws (&optional lim) 
;; Skip backwards over awk-syntactic whitespace.  This is whitespace
;; characters, comments, and NEWLINES WHICH AREN'T "VIRTUAL SEMICOLONS".  For
;; this function, a newline isn't a "virtual semicolon" if that line ends with
;; a real semicolon (or closing brace).
;; However if point starts inside a comment or preprocessor directive, the
;; content of it is not treated as whitespace.  LIM (optional) sets a limit on
;; the backward movement.
  (let ((lim (or lim (point-min)))
        after-real-br)
    (c-backward-syntactic-ws (max lim (c-point 'bol)))
    (while                    ; go back one WS line each time round this loop.
        (and (bolp)
             (> (point) lim)
             (/= (c-awk-get-NL-prop-prev-line) ?\;)
             (/= (point)
                 ;; The following function requires point at BONL [not EOL] to
                 ;; recognise a preceding comment,.
                 (progn (c-backward-syntactic-ws (max lim (c-point 'bopl)))
                        (point)))))
    ;; Does the previous line end with a real ; or }?  If so, go back to it.
    (if (and (bolp)
             (eq (c-awk-get-NL-prop-prev-line) ?\;)
             (save-excursion
               (c-backward-syntactic-ws (max lim (c-point 'bopl)))
               (setq after-real-br (point))
               (c-awk-after-rbrace-or-statement-semicolon)))
        (goto-char after-real-br))))

(defun c-awk-NL-prop-not-set ()
  ;; Is the NL-prop on the current line either nil or unset?
  (not (c-get-char-property (c-point 'eol) 'c-awk-NL-prop)))

(defun c-awk-clear-NL-props (beg end)
  ;; This function is run from before-change-hooks.  It clears the
  ;; c-awk-NL-prop text property from beg to the end of the buffer (The END
  ;; parameter is ignored).  This ensures that the indentation engine will
  ;; never use stale values for this property.
  (save-restriction
    (widen)
    (c-clear-char-properties beg (point-max) 'c-awk-NL-prop)))

(defun c-awk-unstick-NL-prop ()
  ;; Ensure that the text property c-awk-NL-prop is "non-sticky".  Without
  ;; this, a new newline inserted after an old newline (e.g. by C-j) would
  ;; inherit any c-awk-NL-prop from the old newline.  This would be a Bad
  ;; Thing.  This function's action is required by c-put-char-property.
  (if (and (boundp 'text-property-default-nonsticky) ; doesn't exist in Xemacs
           (not (assoc 'c-awk-NL-prop text-property-default-nonsticky)))
      (setq text-property-default-nonsticky
            (cons '(c-awk-NL-prop . t) text-property-default-nonsticky))))

;; The following is purely a diagnostic command, to be commented out of the
;; final release.  ACM, 2002/6/1
;; (defun NL-props ()
;;   (interactive)
;;   (let (pl-prop cl-prop)
;;     (message "Prev-line: %s  Cur-line: %s"
;;              (if (setq pl-prop (c-get-char-property (c-point 'eopl) 'c-awk-NL-prop))
;;                  (char-to-string pl-prop)
;;                "nil")
;;              (if (setq cl-prop (c-get-char-property (c-point 'eol) 'c-awk-NL-prop))
;;                  (char-to-string cl-prop)
;;                "nil"))))
;(define-key awk-mode-map [?\C-c ?\r] 'NL-props) ; commented out, 2002/8/31
;for now.  In the byte compiled version, this causes things to crash because
;awk-mode-map isn't yet defined.  :-(

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; The following section of the code is to do with font-locking.  The biggest
;; problem for font-locking is deciding whether a / is a regular expression
;; delimiter or a division sign - determining precisely where strings and
;; regular expressions start and stop is also troublesome.  This is the
;; purpose of the function c-awk-set-syntax-table-properties and the myriad
;; elisp regular expressions it uses.
;;
;; Because AWK is a line oriented language, I felt the normal cc-mode strategy
;; for font-locking unterminated strings (i.e. font-locking the buffer up to
;; the next string delimiter as a string) was inappropriate.  Instead,
;; unbalanced string/regexp delimiters are given the warning font, being
;; refonted with the string font as soon as the matching delimiter is entered.
;;
;; This requires the region processed by the current font-lock after-change
;; function to have access to the start of the string/regexp, which may be
;; several lines back.  The elisp "advice" feature is used on these functions
;; to allow this.

(defun c-awk-beginning-of-logical-line (&optional pos)
;; Go back to the start of the (apparent) current line (or the start of the
;; line containing POS), returning the buffer position of that point.  I.e.,
;; go back to the last line which doesn't have an escaped EOL before it.
;; 
;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
;; comment, string or regexp.  IT MAY WELL BE that this function should not be
;; executed on a narrowed buffer.
  (if pos (goto-char pos))
  (forward-line 0)
  (while (and (> (point) (point-min))
              (eq (char-before (1- (point))) ?\\))
    (forward-line -1))
  (point))

(defun c-awk-end-of-logical-line (&optional pos)
;; Go forward to the end of the (apparent) current logical line (or the end of
;; the line containing POS), returning the buffer position of that point.  I.e.,
;; go to the end of the next line which doesn't have an escaped EOL.
;;
;; This is guaranteed to be "safe" for syntactic analysis, i.e. outwith any
;; comment, string or regexp.  IT MAY WELL BE that this function should not be
;; executed on a narrowed buffer.
  (if pos (goto-char pos))
  (end-of-line)
  (while (and (< (point) (point-max))
              (eq (char-before) ?\\))
    (end-of-line 2))
  (point))

;; N.B. In the following regexps, an EOL is either \n OR \r.  This is because
;; Emacs has in the past used \r to mark hidden lines in some fashion (and
;; maybe still does).

(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)")
;;   Matches any escaped (with \) character-pair, including an escaped newline.
(defconst c-awk-comment-without-nl "#.*")
;; Matches an AWK comment, not including the terminating NL (if any).  Note
;; that the "enclosing" (elisp) regexp must ensure the # is real.
(defconst c-awk-nl-or-eob "\\(\n\\|\r\\|\\'\\)")
;; Matches a newline, or the end of buffer.

;; "Space" regular expressions.
(defconst c-awk-escaped-nl "\\\\[\n\r]") 
;; Matches an escaped newline.
(defconst c-awk-escaped-nls* (concat "\\(" c-awk-escaped-nl "\\)*"))
;; Matches a possibly empty sequence of escaped newlines.  Used in
;; awk-font-lock-keywords.
;; (defconst c-awk-escaped-nls*-with-space*
;;   (concat "\\(" c-awk-escaped-nls* "\\|" "[ \t]+" "\\)*"))
;; The above RE was very slow.  It's runtime was doubling with each additional
;; space :-(  Reformulate it as below:
(defconst c-awk-escaped-nls*-with-space*
  (concat "\\(" c-awk-escaped-nl "\\|" "[ \t]" "\\)*"))
;; Matches a possibly empty sequence of escaped newlines with optional
;; interspersed spaces and tabs.  Used in awk-font-lock-keywords.

;; REGEXPS FOR "HARMLESS" STRINGS/LINES.
(defconst c-awk-harmless-char-re "[^_#/\"\\\\\n\r]")
;;   Matches any character but a _, #, /, ", \, or newline.  N.B. _" starts a
;; localisation string in gawk 3.1
(defconst c-awk-harmless-_ "_\\([^\"]\\|\\'\\)")
;;   Matches an underline NOT followed by ".
(defconst c-awk-harmless-string*-re
  (concat "\\(" c-awk-harmless-char-re "\\|" c-awk-esc-pair-re "\\|" c-awk-harmless-_ "\\)*"))
;;   Matches a (possibly empty) sequence of chars without unescaped /, ", \,
;; #, or newlines.
(defconst c-awk-harmless-string*-here-re
  (concat "\\=" c-awk-harmless-string*-re))
;; Matches the (possibly empty) sequence of chars without unescaped /, ", \,
;; at point.
(defconst c-awk-harmless-line-re
  (concat c-awk-harmless-string*-re
          "\\(" c-awk-comment-without-nl "\\)?" c-awk-nl-or-eob))
;;   Matches (the tail of) an AWK \"logical\" line not containing an unescaped
;; " or /.  "logical" means "possibly containing escaped newlines".  A comment
;; is matched as part of the line even if it contains a " or a /.  The End of
;; buffer is also an end of line.
(defconst c-awk-harmless-lines+-here-re
  (concat "\\=\\(" c-awk-harmless-line-re "\\)+"))
;; Matches a sequence of (at least one) \"harmless-line\" at point.


;; REGEXPS FOR AWK STRINGS.
(defconst c-awk-string-ch-re "[^\"\\\n\r]")
;; Matches any character which can appear unescaped in a string.
(defconst c-awk-string-innards-re
  (concat "\\(" c-awk-string-ch-re "\\|" c-awk-esc-pair-re "\\)*"))
;;   Matches the inside of an AWK string (i.e. without the enclosing quotes).
(defconst c-awk-string-without-end-here-re
  (concat "\\=_?\"" c-awk-string-innards-re))
;;   Matches an AWK string at point up to, but not including, any terminator.
;; A gawk 3.1+ string may look like _"localisable string".

;; REGEXPS FOR AWK REGEXPS.
(defconst c-awk-regexp-normal-re "[^[/\\\n\r]")
;;   Matches any AWK regexp character which doesn't require special analysis.
(defconst c-awk-escaped-newlines*-re "\\(\\\\[\n\r]\\)*")
;;   Matches a (possibly empty) sequence of escaped newlines.
(defconst c-awk-regexp-char-class-re
  (concat "\\[" c-awk-escaped-newlines*-re "^?" c-awk-escaped-newlines*-re "]?"
          "\\(" c-awk-esc-pair-re "\\|" "[^]\n\r]" "\\)*" "\\(]\\|$\\)"))
;;   Matches a regexp char class, up to (but not including) EOL if the ] is
;;   missing.
(defconst c-awk-regexp-innards-re
  (concat "\\(" c-awk-esc-pair-re "\\|" c-awk-regexp-char-class-re
          "\\|" c-awk-regexp-normal-re "\\)*"))
;;   Matches the inside of an AWK regexp (i.e. without the enclosing /s)
(defconst c-awk-regexp-without-end-re
  (concat "/" c-awk-regexp-innards-re))
;; Matches an AWK regexp up to, but not including, any terminating /. 

;; REGEXPS used for scanning an AWK buffer in order to decide IF A '/' IS A
;; REGEXP OPENER OR A DIVISION SIGN.  By "state" in the following is meant
;; whether a '/' at the current position would by a regexp opener or a
;; division sign.
(defconst c-awk-neutral-re
;  "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)+") ; changed, 2003/6/7
  "\\([{}@` \t]\\|\\+\\+\\|--\\|\\\\.\\)")
;;   A "neutral" char(pair).  Doesn't change the "state" of a subsequent /.
;; This is space/tab, braces, an auto-increment/decrement operator or an
;; escaped character.  Or one of the (illegal) characters @ or `.  But NOT an
;; end of line (even if escpaed).
(defconst c-awk-neutrals*-re
  (concat "\\(" c-awk-neutral-re "\\)*"))
;;   A (possibly empty) string of neutral characters (or character pairs).
(defconst c-awk-var-num-ket-re "[]\)0-9a-zA-Z_$.\x80-\xff]+")
;;   Matches a char which is a constituent of a variable or number, or a ket
;; (i.e. closing bracKET), round or square.  Assume that all characters \x80 to
;; \xff are "letters".
(defconst c-awk-div-sign-re
  (concat c-awk-var-num-ket-re c-awk-neutrals*-re "/"))
;;   Will match a piece of AWK buffer ending in / which is a division sign, in
;; a context where an immediate / would be a regexp bracket.  It follows a
;; variable or number (with optional intervening "neutral" characters).  This
;; will only work when there won't be a preceding " or / before the sought /
;; to foul things up.
(defconst c-awk-non-arith-op-bra-re
  "[[\(&=:!><,?;'~|]")
;;   Matches an openeing BRAcket ,round or square, or any operator character
;; apart from +,-,/,*,%.  For the purpose at hand (detecting a / which is a
;; regexp bracket) these arith ops are unnecessary and a pain, because of "++"
;; and "--".
(defconst c-awk-regexp-sign-re
  (concat c-awk-non-arith-op-bra-re c-awk-neutrals*-re "/"))
;;   Will match a piece of AWK buffer ending in / which is an opening regexp
;; bracket, in a context where an immediate / would be a division sign.  This
;; will only work when there won't be a preceding " or / before the sought /
;; to foul things up.

;; ACM, 2002/02/15: The idea of the next function is to put the "Error font"
;; on strings/regexps which are missing their closing delimiter.
;; 2002/4/28.  The default syntax for / has been changed from "string" to
;; "punctuation", to reduce hassle when this character appears within a string
;; or comment.

(defun c-awk-set-string-regexp-syntax-table-properties (beg end)
;; BEG and END bracket a (possibly unterminated) string or regexp.  The
;; opening delimiter is after BEG, and the closing delimiter, IF ANY, is AFTER
;; END.  Set the appropriate syntax-table properties on the delimiters and
;; contents of this string/regex.
;;
;; "String" here can also mean a gawk 3.1 "localizable" string which starts
;; with _".  In this case, we step over the _ and ignore it; It will get it's
;; font from an entry in awk-font-lock-keywords.
;;
;; If the closing delimiter is missing (i.e., there is an EOL there) set the
;; STRING-FENCE property on the opening " or / and closing EOL.
  (if (eq (char-after beg) ?_) (setq beg (1+ beg)))

  ;; First put the properties on the delimiters.
  (cond ((eq end (point-max))           ; string/regexp terminated by EOB
         (put-text-property beg (1+ beg) 'syntax-table '(15))) ; (15) = "string fence"
        ((/= (char-after beg) (char-after end)) ; missing end delimiter
         (put-text-property beg (1+ beg) 'syntax-table '(15))
         (put-text-property end (1+ end) 'syntax-table '(15)))
        ((eq (char-after beg) ?/)       ; Properly bracketed regexp
         (put-text-property beg (1+ beg) 'syntax-table '(7)) ; (7) = "string"
         (put-text-property end (1+ end) 'syntax-table '(7)))
        (t))                       ; Properly bracketed string: Nothing to do.
  ;; Now change the properties of any escaped "s in the string to punctuation.
  (save-excursion
    (goto-char (1+ beg))
    (or (eobp)
        (while (search-forward "\"" end t)
          (put-text-property (1- (point)) (point) 'syntax-table '(1))))))

(defun c-awk-syntax-tablify-string ()
  ;; Point is at the opening " or _" of a string.  Set the syntax-table
  ;; properties on this string, leaving point just after the string.
  ;;
  ;; The result is nil if a / immediately after the string would be a regexp
  ;; opener, t if it would be a division sign.
  (search-forward-regexp c-awk-string-without-end-here-re nil t) ; a (possibly unterminated) string
  (c-awk-set-string-regexp-syntax-table-properties
   (match-beginning 0) (match-end 0))
  (cond ((looking-at "\"")
         (forward-char)
         t)                             ; In AWK, ("15" / 5) gives 3 ;-)
        ((looking-at "[\n\r]")          ; Unterminated string with EOL.
         (forward-char)
         nil)                           ; / on next line would start a regexp
        (t nil)))                       ; Unterminated string at EOB

(defun c-awk-syntax-tablify-/ (anchor anchor-state-/div)
  ;; Point is at a /.  Determine whether this is a division sign or a regexp
  ;; opener, and if the latter, apply syntax-table properties to the entire
  ;; regexp.  Point is left immediately after the division sign or regexp, as
  ;; the case may be.
  ;;
  ;; ANCHOR-STATE-/DIV identifies whether a / at ANCHOR would have been a
  ;; division sign (value t) or a regexp opener (value nil).  The idea is that
  ;; we analyse the line from ANCHOR up till point to determine what the / at
  ;; point is.
  ;;
  ;; The result is what ANCHOR-STATE-/DIV (see above) is where point is left.
  (let ((/point (point)))
    (goto-char anchor)
    ;; Analyse the line to find out what the / is.
    (if (if anchor-state-/div
            (not (search-forward-regexp c-awk-regexp-sign-re (1+ /point) t))
          (search-forward-regexp c-awk-div-sign-re (1+ /point) t))
        ;; A division sign.
      (progn (goto-char (1+ /point)) nil)
      ;; A regexp opener
      ;; Jump over the regexp innards, setting the match data.
      (goto-char /point)
      (search-forward-regexp c-awk-regexp-without-end-re)
      (c-awk-set-string-regexp-syntax-table-properties
       (match-beginning 0) (match-end 0))
      (cond ((looking-at "/")           ; Terminating /
             (forward-char)
             t)
            ((looking-at "[\n\r]")      ; Incomplete regexp terminated by EOL
             (forward-char)
             nil)                  ; / on next line would start another regexp
            (t nil)))))                 ; Unterminated regexp at EOB

(defun c-awk-set-syntax-table-properties (lim)
;;     Scan the buffer text between point and LIM, setting (and clearing) the
;; syntax-table property where necessary.
;;
;; This function is designed to be called as the FUNCTION in a MATCHER in
;; font-lock-syntactic-keywords, and it always returns NIL (to inhibit
;; repeated calls from font-lock: See elisp info page "Search-based
;; Fontification").  It also gets called, with a bit of glue, from
;; after-change-functions when font-lock isn't active.  Point is left
;; "undefined" after this function exits.  THE BUFFER SHOULD HAVE BEEN
;; WIDENED, AND ANY PRECIOUS MATCH-DATA SAVED BEFORE CALLING THIS ROUTINE.
;;
;; We need to set/clear the syntax-table property on:
;; (i) / - It is set to "string" on a / which is the opening or closing
;;     delimiter of the properly terminated regexp (and left unset on a
;;     division sign).
;; (ii) the opener of an unterminated string/regexp, we set the property
;;    "generic string delimiter" on both the opening " or / and the end of the
;;    line where the closing delimiter is missing.
;; (iii) "s inside strings/regexps (these will all be escaped "s).  They are
;;   given the property "punctuation".  This will later allow other routines
;;   to use the regexp "\\S\"*" to skip over the string innards.
;; (iv) Inside a comment, all syntax-table properties are cleared.
  (let (anchor
	(anchor-state-/div nil)) ; t means a following / would be a div sign.
    (c-awk-beginning-of-logical-line) ; ACM 2002/7/21.  This is probably redundant.
    (put-text-property (point) lim 'syntax-table nil)
    (search-forward-regexp c-awk-harmless-lines+-here-re nil t) ; skip harmless lines.

    ;; Once round the next loop for each string, regexp, or div sign
    (while (< (point) lim)
      (setq anchor (point))
      (search-forward-regexp c-awk-harmless-string*-here-re nil t)
      ;; We are now looking at either a " or a /.
      ;; Do our thing on the string, regexp or divsion sign.
      (setq anchor-state-/div
            (if (looking-at "_?\"")
                (c-awk-syntax-tablify-string)
              (c-awk-syntax-tablify-/ anchor anchor-state-/div)))

      ;; Skip any further "harmless" lines before the next tricky one. 
      (if (search-forward-regexp c-awk-harmless-lines+-here-re nil t)
          (setq anchor-state-/div nil)))
    nil))


;; ACM, 2002/07/21: Thoughts: We need an AWK Mode after-change function to set
;; the syntax-table properties even when font-lock isn't enabled, for the
;; subsequent use of movement functions, etc.  However, it seems that if font
;; lock _is_ enabled, we can always leave it to do the job.
(defvar c-awk-old-EOLL 0)
(make-variable-buffer-local 'c-awk-old-EOLL)
;; End of logical line following the region which is about to be changed.  Set
;; in c-awk-before-change and used in c-awk-after-change.

(defun c-awk-before-change (beg end)
;; This function is called exclusively from the before-change-functions hook.
;; It does two things: Finds the end of the (logical) line on which END lies,
;; and clears c-awk-NL-prop text properties from this point onwards.
  (save-restriction
    (save-excursion
      (setq c-awk-old-EOLL (c-awk-end-of-logical-line end))
      (c-save-buffer-state nil
       (c-awk-clear-NL-props end (point-max))))))

(defun c-awk-end-of-change-region (beg end old-len)
  ;; Find the end of the region which needs to be font-locked after a change.
  ;; This is the end of the logical line on which the change happened, either
  ;; as it was before the change, or as it is now, which ever is later.
  ;; N.B. point is left undefined.
  (max (+ (- c-awk-old-EOLL old-len) (- end beg))
       (c-awk-end-of-logical-line end)))

(defun c-awk-after-change (beg end old-len)
;; This function is called exclusively as an after-change function in
;; AWK Mode.  It ensures that the syntax-table properties get set in the
;; changed region.  However, if font-lock is enabled, this function does
;; nothing, since an enabled font-lock after-change function will always do
;; this.
  (unless (and (boundp 'font-lock-mode) font-lock-mode)
    (save-restriction
      (save-excursion
        (setq end (c-awk-end-of-change-region beg end old-len))
        (c-awk-beginning-of-logical-line beg)
        (c-save-buffer-state nil ; So that read-only status isn't affected.
                                        ; (e.g. when first loading the buffer)
          (c-awk-set-syntax-table-properties end))))))

;; ACM 2002/5/25.  When font-locking is invoked by a buffer change, the region
;; specified by the font-lock after-change function must be expanded to
;; include ALL of any string or regexp within the region.  The simplest way to
;; do this in practice is to use the beginning/end-of-logical-line functions.
;; Don't overlook the possibility of the buffer change being the "recapturing"
;; of a previously escaped newline.
(defmacro c-awk-advise-fl-for-awk-region (function)
  `(defadvice ,function (before get-awk-region activate)
;; When font-locking an AWK Mode buffer, make sure that any string/regexp is
;; completely font-locked.
  (when (eq major-mode 'awk-mode)
    (save-excursion
      (ad-set-arg 1 (c-awk-end-of-change-region
                     (ad-get-arg 0)     ; beg
                     (ad-get-arg 1)     ; end
                     (ad-get-arg 2)))   ; old-len
      (ad-set-arg 0 (c-awk-beginning-of-logical-line (ad-get-arg 0)))))))

(c-awk-advise-fl-for-awk-region font-lock-after-change-function)
(c-awk-advise-fl-for-awk-region jit-lock-after-change)
(c-awk-advise-fl-for-awk-region lazy-lock-defer-rest-after-change)
(c-awk-advise-fl-for-awk-region lazy-lock-defer-line-after-change)

;; ACM 2002/9/29.  Functions for C-M-a and C-M-e

(defconst c-awk-terminated-regexp-or-string-here-re "\\=\\s\"\\S\"*\\s\"")
;; Matches a terminated string/regexp (utilising syntax-table properties).

(defconst c-awk-unterminated-regexp-or-string-here-re "\\=\\s|\\S|*$")
;; Matches an unterminated string/regexp, NOT including the eol at the end.

(defconst c-awk-harmless-pattern-characters*
  (concat "\\([^{;#/\"\\\\\n\r]\\|" c-awk-esc-pair-re "\\)*"))
;; Matches any "harmless" character in a pattern or an escaped character pair.

(defun c-awk-beginning-of-defun (&optional arg)
  "Move backward to the beginning of an AWK \"defun\".  With ARG, do it that
many times.  Negative arg -N means move forward to Nth following beginning of
defun.  Returns t unless search stops due to beginning or end of buffer.

By a \"defun\" is meant either a pattern-action pair or a function.  The start
of a defun is recognised as code starting at column zero which is neither a
closing brace nor a comment nor a continuation of the previous line.  Unlike
in some other modes, having an opening brace at column 0 is neither necessary
nor helpful."
  (interactive "p")
  (save-match-data
    (c-save-buffer-state                ; ensures the buffer is writable.
     nil
     (let ((found t))     ; Has the most recent regexp search found b-of-defun?
       (if (>= arg 0)
           ;; Go back one defun each time round the following loop. (For +ve arg)
           (while (and found (> arg 0) (not (eq (point) (point-min))))
             ;; Go back one "candidate" each time round the next loop until one
             ;; is genuinely a beginning-of-defun.
             (while (and (setq found (search-backward-regexp
                                      "^[^#} \t\n\r]" (point-min) 'stop-at-limit))
                         (not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#)))))
             (setq arg (1- arg)))
         ;; The same for a -ve arg.
         (if (not (eq (point) (point-max))) (forward-char 1))
         (while (and found (< arg 0) (not (eq (point) (point-max)))) ; The same for -ve arg.
           (while (and (setq found (search-forward-regexp
                                    "^[^#} \t\n\r]" (point-max) 'stop-at-limit))
                       (not (memq (c-awk-get-NL-prop-prev-line) '(?\; ?\#)))))
           (setq arg (1+ arg)))
         (if found (goto-char (match-beginning 0))))
       (eq arg 0)))))

(defun c-awk-forward-awk-pattern ()
  ;; Point is at the start of an AWK pattern (which may be null) or function
  ;; declaration.  Move to the pattern's end, and past any trailing space or
  ;; comment.  Typically, we stop at the { which denotes the corresponding AWK
  ;; action/function body.  Otherwise we stop at the EOL (or ;) marking the
  ;; absence of an explicit action.
  (while
      (progn
        (search-forward-regexp c-awk-harmless-pattern-characters*)
        (if (looking-at "#") (end-of-line))
        (cond
         ((eobp) nil)
         ((looking-at "[{;]") nil)  ; We've finished!
         ((eolp)
          (if (c-awk-cur-line-incomplete-p)
              (forward-line)            ; returns non-nil
            nil))
         ((search-forward-regexp c-awk-terminated-regexp-or-string-here-re nil t))
         ((search-forward-regexp c-awk-unterminated-regexp-or-string-here-re nil t))
         ((looking-at "/") (forward-char) t))))) ; division sign.

(defun c-awk-end-of-defun1 ()
  ;; point is at the start of a "defun".  Move to its end.  Return end position.
  (c-awk-forward-awk-pattern)
  (cond
   ((looking-at "{") (goto-char (scan-sexps (point) 1)))
   ((looking-at ";") (forward-char))
   ((eolp))
   (t (error "c-awk-end-of-defun1:  Failure of c-awk-forward-awk-pattern")))
  (point))

(defun c-awk-beginning-of-defun-p ()
  ;; Are we already at the beginning of a defun?  (i.e. at code in column 0
  ;; which isn't a }, and isn't a continuation line of any sort.
  (and (looking-at "^[^#} \t\n\r]")
       (not (c-awk-prev-line-incomplete-p))))

(defun c-awk-end-of-defun (&optional arg)
  "Move forward to next end of defun.  With argument, do it that many times.
Negative argument -N means move back to Nth preceding end of defun.

An end of a defun occurs right after the closing brace that matches the
opening brace at its start, or immediately after the AWK pattern when there is
no explicit action; see function `c-awk-beginning-of-defun'."
  (interactive "p")
  (or arg (setq arg 1))
  (save-match-data
    (c-save-buffer-state
     nil
     (let ((start-point (point)) end-point)
       ;; Strategy: (For +ve ARG): If we're not already at a beginning-of-defun,
       ;; move backwards to one.
       ;; Repeat [(i) move forward to end-of-current-defun (see below);
       ;;         (ii) If this isn't it, move forward to beginning-of-defun].
       ;; We start counting ARG only when step (i) has passed the original point.
       (when (> arg 0)
         ;; Try to move back to a beginning-of-defun, if not already at one.
         (if (not (c-awk-beginning-of-defun-p))
             (when (not (c-awk-beginning-of-defun 1)) ; No bo-defun before point.
               (goto-char start-point)
               (c-awk-beginning-of-defun -1))) ; if this fails, we're at EOB, tough!
         ;; Now count forward, one defun at a time
         (while (and (not (eobp))
                     (c-awk-end-of-defun1)
                     (if (> (point) start-point) (setq arg (1- arg)) t)
                     (> arg 0)
                     (c-awk-beginning-of-defun -1))))

       (when (< arg 0)
         (setq end-point start-point)
         (while (and (not (bobp))
                     (c-awk-beginning-of-defun 1)
                     (if (< (setq end-point (if (bobp) (point)
                                              (save-excursion (c-awk-end-of-defun1))))
                            start-point)
                         (setq arg (1+ arg)) t)
                     (< arg 0)))
         (goto-char (min start-point end-point)))))))

(cc-provide 'cc-awk)			; Changed from 'awk-mode, ACM 2002/5/21

;;; arch-tag: c4836289-3aa4-4a59-9934-9ccc2bacccf3
;;; awk-mode.el ends here