;;; Scheme reader ;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021 ;;; Free Software Foundation, Inc. ;;; ;;; This library is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU Lesser General Public License as ;;; published by the Free Software Foundation, either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; This library is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Lesser General Public License for more details. ;;; ;;; You should have received a copy of the GNU Lesser General Public ;;; License along with this program. If not, see ;;; . ;;; Commentary: ;;; ;;; Implementation of Scheme's "read". ;;; ;;; Code: ;; While porting read.c to Scheme, I found these expressions that result ;; in undesirable behavior in the C reader. Most all of them are also ;; present in the Scheme reader. Probably I should fix all of them, but ;; I would first like to prove that the Scheme reader is good enough. ;; ;; (call-with-input-string "," read) ;; (read-disable 'square-brackets), then (call-with-input-string "]" read) ;; (call-with-input-string "(#tru1)" read) => '(#t ru1) ;; (call-with-input-string "(#true1)" read) => '(#t 1) ;; (call-with-input-string "(#fAlse)" read) => '(#f Alse) ;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array ;; #: foo ;; #:#|what|#foo ;; #@-(1 2 3) => #(1 2 3) ;; (#*10101010102) => (#*1010101010 2) (define-syntax let*-values (syntax-rules () ((_ () . body) (let () . body)) ((_ ((vars expr) . binds) . body) (call-with-values (lambda () expr) (lambda vars (let*-values binds . body)))))) (define bitfield:record-positions? 0) (define bitfield:case-insensitive? 2) (define bitfield:keyword-style 4) (define bitfield:r6rs-escapes? 6) (define bitfield:square-brackets? 8) (define bitfield:hungry-eol-escapes? 10) (define bitfield:curly-infix? 12) (define bitfield:r7rs-symbols? 14) (define read-option-bits 16) (define read-option-mask #b11) (define read-option-inherit #b11) (define read-options-inherit-all (1- (ash 1 read-option-bits))) (define keyword-style-hash-prefix 0) (define keyword-style-prefix 1) (define keyword-style-postfix 2) (define (compute-reader-options port) (let ((options (read-options)) (port-options (or (%port-property port 'port-read-options) read-options-inherit-all))) (define-syntax-rule (option field exp) (let ((port-option (logand port-options (ash read-option-mask field)))) (if (= port-option (ash read-option-inherit field)) exp port-option))) (define (bool key field) (option field (if (memq key options) (ash 1 field) 0))) (define (enum key values field) (option field (ash (assq-ref values (and=> (memq key options) cadr)) field))) (logior (bool 'positions bitfield:record-positions?) (bool 'case-insensitive bitfield:case-insensitive?) (enum 'keywords '((#f . 0) (prefix . 1) (postfix . 2)) bitfield:keyword-style) (bool 'r6rs-hex-escapes bitfield:r6rs-escapes?) (bool 'square-brackets bitfield:square-brackets?) (bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?) (bool 'curly-infix bitfield:curly-infix?) (bool 'r7rs-symbols bitfield:r7rs-symbols?)))) (define (set-option options field new) (logior (ash new field) (logand options (lognot (ash #b11 field))))) (define (set-port-read-option! port field value) (%set-port-property! port 'port-read-options (set-option (or (%port-property port 'port-read-options) read-options-inherit-all) field value))) (define (%read port annotate strip-annotation) ;; init read options (define opts (compute-reader-options port)) (define (enabled? field) (not (zero? (logand (ash 1 field) opts)))) (define (set-reader-option! field value) (set! opts (set-option opts field value)) (set-port-read-option! port field value)) (define (case-insensitive?) (enabled? bitfield:case-insensitive?)) (define (keyword-style) (logand read-option-mask (ash opts (- bitfield:keyword-style)))) (define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?)) (define (square-brackets?) (enabled? bitfield:square-brackets?)) (define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?)) (define (curly-infix?) (enabled? bitfield:curly-infix?)) (define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?)) (define neoteric 0) (define (next) (read-char port)) (define (peek) (peek-char port)) (define filename (port-filename port)) (define (get-pos) (cons (port-line port) (port-column port))) ;; We are only ever interested in whether an object is a char or not. (define (eof-object? x) (not (char? x))) (define (input-error msg args) (scm-error 'read-error #f (format #f "~A:~S:~S: ~A" (or filename "#") (1+ (port-line port)) (1+ (port-column port)) msg) args #f)) (define-syntax-rule (error msg arg ...) (let ((args (list arg ...))) (input-error msg args))) (define (read-semicolon-comment) (let ((ch (next))) (cond ((eof-object? ch) ch) ((eqv? ch #\newline) (next)) (else (read-semicolon-comment))))) (define-syntax-rule (take-until first pred) (let lp ((out (list first))) (let ((ch (peek))) (if (or (eof-object? ch) (pred ch)) (reverse-list->string out) (begin (next) (lp (cons ch out))))))) (define-syntax-rule (take-while first pred) (take-until first (lambda (ch) (not (pred ch))))) (define (delimiter? ch) (case ch ((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab) #t) ((#\[ #\]) (or (square-brackets?) (curly-infix?))) ((#\{ #\}) (curly-infix?)) (else #f))) (define (read-token ch) (take-until ch delimiter?)) (define (read-mixed-case-symbol ch) (let* ((str (read-token ch)) (len (string-length str))) (cond ((and (eq? (keyword-style) keyword-style-postfix) (> len 1) (eqv? #\: (string-ref str (1- len)))) (let ((str (substring str 0 (1- len)))) (symbol->keyword (string->symbol (if (case-insensitive?) (string-downcase str) str))))) (else (string->symbol (if (case-insensitive?) (string-downcase str) str)))))) (define (read-parenthesized rdelim) (define (finish-curly-infix ret) ;; Perform syntactic transformations on {...} lists. (define (extract-infix-list ls) (and (pair? ls) (let ((x (car ls)) (ls (cdr ls))) (and (pair? ls) (let ((op (car ls)) (ls (cdr ls))) (if (and (pair? ls) (null? (cdr ls))) (cons* op x ls) (let ((tail (extract-infix-list ls))) (and tail (equal? (strip-annotation op) (strip-annotation (car tail))) (cons* op x (cdr tail)))))))))) (cond ((not (eqv? rdelim #\})) ret) ; Only on {...} lists. ((not (pair? ret)) ret) ; {} => (); {.x} => x ((null? (cdr ret)) (car ret)); {x} => x ((and (pair? (cdr ret)) (null? (cddr ret))) ret) ; {x y} => (x y) ((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z) (else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z) (define curly? (eqv? rdelim #\})) (finish-curly-infix (let lp ((ch (next-non-whitespace))) (when (eof-object? ch) (error "unexpected end of input while searching for: ~A" rdelim)) (cond ((eqv? ch rdelim) '()) ((or (eqv? ch #\)) (and (eqv? ch #\]) (or (square-brackets?) (curly-infix?))) (and (eqv? ch #\}) (curly-infix?))) (error "mismatched close paren: ~A" ch)) (else (let ((expr (read-expr ch))) ;; Note that it is possible for scm_read_expression to ;; return `.', but not as part of a dotted pair: as in ;; #{.}#. Indeed an example is here! (if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#)) (let* ((tail (read-subexpression "tail of improper list")) (close (next-non-whitespace))) (unless (eqv? close rdelim) (error "missing close paren: ~A" close)) tail) (cons expr (lp (next-non-whitespace)))))))))) (define (hex-digit ch) (case ch ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (- (char->integer ch) (char->integer #\0))) ((#\a #\b #\c #\d #\e #\f) (+ 10 (- (char->integer ch) (char->integer #\a)))) ((#\A #\B #\C #\D #\E #\F) (+ 10 (- (char->integer ch) (char->integer #\A)))) (else #f))) (define (read-r6rs-hex-escape) (let ((ch (next))) (cond ((hex-digit ch) => (lambda (res) (let lp ((res res)) (let ((ch (next))) (cond ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit)))) ((eqv? ch #\;) (integer->char res)) ((eof-object? ch) (error "unexpected end of input in character escape sequence")) (else (error "invalid character in escape sequence: ~S" ch))))))) ((eof-object? ch) (error "unexpected end of input in character escape sequence")) (else (error "invalid character in escape sequence: ~S" ch))))) (define (read-fixed-hex-escape len) (let lp ((len len) (res 0)) (if (zero? len) (integer->char res) (let ((ch (next))) (cond ((hex-digit ch) => (lambda (digit) (lp (1- len) (+ (* res 16) digit)))) ((eof-object? ch) (error "unexpected end of input in character escape sequence")) (else (error "invalid character in escape sequence: ~S" ch))))))) (define (read-string rdelim) (let lp ((out '())) (let ((ch (next))) (cond ((eof-object? ch) (error "unexpected end of input while reading string")) ((eqv? ch rdelim) (reverse-list->string out)) ((eqv? ch #\\) (let ((ch (next))) (when (eof-object? ch) (error "unexpected end of input while reading string")) (cond ((eqv? ch #\newline) (when (hungry-eol-escapes?) ;; Skip intraline whitespace before continuing. (let skip () (let ((ch (peek))) (when (and (not (eof-object? ch)) (or (eqv? ch #\tab) (eq? (char-general-category ch) 'Zs))) (next) (skip))))) (lp out)) ((eqv? ch rdelim) (lp (cons rdelim out))) (else (lp (cons (case ch ;; Accept "\(" for use at the beginning of ;; lines in multiline strings to avoid ;; confusing emacs lisp modes. ((#\| #\\ #\() ch) ((#\0) #\nul) ((#\f) #\ff) ((#\n) #\newline) ((#\r) #\return) ((#\t) #\tab) ((#\a) #\alarm) ((#\v) #\vtab) ((#\b) #\backspace) ((#\x) (if (or (r6rs-escapes?) (eqv? rdelim #\|)) (read-r6rs-hex-escape) (read-fixed-hex-escape 2))) ((#\u) (read-fixed-hex-escape 4)) ((#\U) (read-fixed-hex-escape 6)) (else (error "invalid character in escape sequence: ~S" ch))) out)))))) (else (lp (cons ch out))))))) (define (read-character) (let ((ch (next))) (cond ((eof-object? ch) (error "unexpected end of input after #\\")) ((delimiter? ch) ch) (else (let* ((tok (read-token ch)) (len (string-length tok))) (define dotted-circle #\x25cc) (define r5rs-charnames '(("space" . #\x20) ("newline" . #\x0a))) (define r6rs-charnames '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08) ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b) ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b) ("delete" . #\x7f))) (define r7rs-charnames '(("escape" . #\x1b))) (define C0-control-charnames '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02) ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05) ("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08) ("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b) ("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e) ("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11) ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14) ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17) ("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a) ("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d) ("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20) ("del" . #\x7f))) (define alt-charnames '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c))) ;; Although R6RS and R7RS charnames specified as being ;; case-sensitive, Guile matches them case-insensitively, like ;; other char names. (define (named-char tok alist) (let lp ((alist alist)) (and (pair? alist) (if (string-ci=? tok (caar alist)) (cdar alist) (lp (cdr alist)))))) (cond ((= len 1) ch) ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle)) ;; Ignore dotted circles, which may be used to keep ;; combining characters from combining with the backslash in ;; #\charname. ch) ((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7)) (string->number tok 8)) ;; Specifying a codepoint as an octal value. => integer->char) ((and (eqv? ch #\x) (> len 1) (string->number (substring tok 1) 16)) ;; Specifying a codepoint as an hexadecimal value. Skip ;; initial "x". => integer->char) ((named-char tok r5rs-charnames)) ((named-char tok r6rs-charnames)) ((named-char tok r7rs-charnames)) ((named-char tok C0-control-charnames)) ((named-char tok alt-charnames)) (else (error "unknown character name ~a" tok)))))))) (define (read-vector) (list->vector (map strip-annotation (read-parenthesized #\))))) (define (read-srfi-4-vector ch) (read-array ch)) (define (maybe-read-boolean-tail tail) (let ((len (string-length tail))) (let lp ((i 0)) (or (= i len) (let ((ch (peek))) (and (not (eof-object? ch)) (eqv? (char-downcase ch) (string-ref tail i)) (or (begin (next) (lp (1+ i))) (begin (unread-char ch port) #f)))))))) (define (read-false-or-srfi-4-vector) (let ((ch (peek))) (if (or (eqv? ch #\3) (eqv? ch #\6)) (read-srfi-4-vector #\f) (begin (maybe-read-boolean-tail "alse") #f)))) (define (read-bytevector) (define (expect ch) (unless (eqv? (next) ch) (error "invalid bytevector prefix" ch))) (expect #\u) (expect #\8) (expect #\() (list->typed-array 'vu8 1 (map strip-annotation (read-parenthesized #\))))) ;; FIXME: We should require a terminating delimiter. (define (read-bitvector) (list->bitvector (let lp () (let ((ch (peek))) (case ch ((#\0) (next) (cons #f (lp))) ((#\1) (next) (cons #t (lp))) (else '())))))) (define (read-boolean ch) ;; Historically, Guile hasn't required a delimiter after #f / #t. ;; When the longer #false / #true forms were added, we kept this ;; behavior. It is terrible and we should change it!! (case ch ((#\t #\T) (maybe-read-boolean-tail "rue") #t) (else (maybe-read-boolean-tail "alse") #f))) (define (read-keyword) (let ((expr (strip-annotation (read-subexpression "keyword")))) (unless (symbol? expr) (error "keyword prefix #: not followed by a symbol: ~a" expr)) (symbol->keyword expr))) (define (read-array ch) (define (read-decimal-integer ch alt) ;; This parser has problems but it's what Guile's read.c does. Any ;; fix should come later and to both of them. (define (decimal-digit ch) (and (not (eof-object? ch)) (let ((digit (- (char->integer ch) (char->integer #\0)))) (and (<= 0 digit 9) digit)))) (let*-values (((sign ch) (if (eqv? ch #\-) (values -1 (next)) (values 1 ch)))) (let lp ((ch ch) (res #f)) (cond ((decimal-digit ch) => (lambda (digit) (lp (next) (if res (+ (* 10 res) digit) digit)))) (else (values ch (if res (* res sign) alt))))))) (define (read-rank ch) (let*-values (((ch rank) (read-decimal-integer ch 1))) (when (< rank 0) (error "array rank must be non-negative")) (when (eof-object? ch) (error "unexpected end of input while reading array")) (values ch rank))) (define (read-tag ch) (let lp ((ch ch) (chars '())) (when (eof-object? ch) (error "unexpected end of input while reading array")) (if (memv ch '(#\( #\@ #\:)) (values ch (if (null? chars) #t (string->symbol (list->string (reverse chars))))) (lp (next) (cons ch chars))))) (define (read-dimension ch) (let*-values (((ch lbnd) (if (eqv? ch #\@) (read-decimal-integer (next) 0) (values ch 0))) ((ch len) (if (eqv? ch #\:) (read-decimal-integer (next) 0) (values ch #f)))) (when (and len (< len 0)) (error "array length must be non-negative")) (when (eof-object? ch) (error "unexpected end of input while reading array")) (values ch (if len (list lbnd (+ lbnd (1- len))) lbnd)))) (define (read-shape ch alt) (if (memv ch '(#\@ #\:)) (let*-values (((ch head) (read-dimension ch)) ((ch tail) (read-shape ch '()))) (values ch (cons head tail))) (values ch alt))) (define (read-elements ch rank) (unless (eqv? ch #\() (error "missing '(' in vector or array literal")) (let ((elts (map strip-annotation (read-parenthesized #\))))) (if (zero? rank) (begin ;; Handle special print syntax of rank zero arrays; see ;; scm_i_print_array for a rationale. (when (null? elts) (error "too few elements in array literal, need 1")) (unless (null? (cdr elts)) (error "too many elements in array literal, need 1")) (car elts)) elts))) (let*-values (((ch rank) (read-rank ch)) ((ch tag) (read-tag ch)) ((ch shape) (read-shape ch rank)) ((elts) (read-elements ch rank))) (when (and (pair? shape) (not (eqv? (length shape) rank))) (error "the number of shape specifications must match the array rank")) (list->typed-array tag shape elts))) (define (read-number-and-radix ch) (let ((tok (string-append "#" (read-token ch)))) (or (string->number tok) (error "unknown # object: ~S" tok)))) (define (read-extended-symbol) (define (next-not-eof) (let ((ch (next))) (when (eof-object? ch) (error "end of input while reading symbol")) ch)) (string->symbol (list->string (let lp ((saw-brace? #f)) (let lp/inner ((ch (next-not-eof)) (saw-brace? saw-brace?)) (cond (saw-brace? (if (eqv? ch #\#) '() ;; Don't eat CH, see ;; . (cons #\} (lp/inner ch #f)))) ((eqv? ch #\}) (lp #t)) ((eqv? ch #\\) ;; It used to be that print.c would print extended-read-syntax ;; symbols with backslashes before "non-standard" chars, but ;; this routine wouldn't do anything with those escapes. ;; Bummer. What we've done is to change print.c to output ;; R6RS hex escapes for those characters, relying on the fact ;; that the extended read syntax would never put a `\' before ;; an `x'. For now, we just ignore other instances of ;; backslash in the string. (let* ((ch (next-not-eof)) (ch (if (eqv? ch #\x) (read-r6rs-hex-escape) ch))) (cons ch (lp #f)))) (else (cons ch (lp #f))))))))) (define (read-nil) ;; Have already read "#\n" -- now read "il". (let ((id (read-mixed-case-symbol #\n))) (unless (eq? id 'nil) (error "unexpected input while reading #nil: ~a" id)) #nil)) (define (read-sharp) (let* ((ch (next))) (cond ((eof-object? ch) (error "unexpected end of input after #")) ((read-hash-procedure ch) => (lambda (proc) (proc ch port))) (else (case ch ((#\\) (read-character)) ((#\() (read-vector)) ((#\s #\u #\c) (read-srfi-4-vector ch)) ((#\f) (read-false-or-srfi-4-vector)) ((#\v) (read-bytevector)) ((#\*) (read-bitvector)) ((#\t #\T #\F) (read-boolean ch)) ((#\:) (read-keyword)) ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@) (read-array ch)) ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E) (read-number-and-radix ch)) ((#\{) (read-extended-symbol)) ((#\') (list 'syntax (read-subexpression "syntax expression"))) ((#\`) (list 'quasisyntax (read-subexpression "quasisyntax expression"))) ((#\,) (if (eqv? #\@ (peek)) (begin (next) (list 'unsyntax-splicing (read-subexpression "unsyntax-splicing expression"))) (list 'unsyntax (read-subexpression "unsyntax expression")))) ((#\n) (read-nil)) (else (error "Unknown # object: ~S" (string #\# ch)))))))) (define (read-number ch) (let* ((str (read-token ch))) (or (string->number str) (string->symbol (if (case-insensitive?) (string-downcase str) str))))) (define (read-expr* ch) (case ch ((#\{) (cond ((curly-infix?) (set! neoteric (1+ neoteric)) (let ((expr (read-parenthesized #\}))) (set! neoteric (1- neoteric)) expr)) (else (read-mixed-case-symbol ch)))) ((#\[) (cond ((square-brackets?) (read-parenthesized #\])) ((curly-infix?) ;; The syntax of neoteric expressions requires that '[' be a ;; delimiter when curly-infix is enabled, so it cannot be part ;; of an unescaped symbol. We might as well do something ;; useful with it, so we adopt Kawa's convention: [...] => ;; ($bracket-list$ ...) ;; FIXME: source locations for this cons (cons '$bracket-list$ (read-parenthesized #\]))) (else (read-mixed-case-symbol ch)))) ((#\() (read-parenthesized #\))) ((#\") (read-string ch)) ((#\|) (if (r7rs-symbols?) (string->symbol (read-string ch)) (read-mixed-case-symbol ch))) ((#\') (list 'quote (read-subexpression "quoted expression"))) ((#\`) (list 'quasiquote (read-subexpression "quasiquoted expression"))) ((#\,) (cond ((eqv? #\@ (peek)) (next) (list 'unquote-splicing (read-subexpression "subexpression of ,@"))) (else (list 'unquote (read-subexpression "unquoted expression"))))) ((#\#) ;; FIXME: read-sharp should recur if we read a comment (read-sharp)) ((#\)) (error "unexpected \")\"")) ((#\}) (if (curly-infix?) (error "unexpected \"}\"") (read-mixed-case-symbol ch))) ((#\]) (if (square-brackets?) (error "unexpected \"]\"") (read-mixed-case-symbol ch))) ((#\:) (if (eq? (keyword-style) keyword-style-prefix) ;; FIXME: Don't skip whitespace here. (let ((sym (read-subexpression ":keyword"))) (symbol->keyword (strip-annotation sym))) (read-mixed-case-symbol ch))) ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.) (read-number ch)) (else (read-mixed-case-symbol ch)))) (define (read-neoteric ch) (let lp ((expr (read-expr* ch))) ;; 'expr' is the first component of the neoteric expression. If ;; the next character is '(', '[', or '{', (without any ;; intervening whitespace), we use it to construct a new ;; expression, and loop. For example: ;; f{n - 1}(x) => ((f (- n 1)) x). (case (peek) ((#\() ;; e(...) => (e ...) (next) (lp (cons expr (read-parenthesized #\))))) ((#\[) ;; e[...] => ($bracket-apply$ e ...) (next) (lp (cons* '$bracket-apply$ expr (read-parenthesized #\])))) ((#\{) ;; e{} => (e); e{...} => (e {...}) (next) (let ((args (read-parenthesized #\}))) (lp (if (null? args) (list expr) (list expr args))))) (else expr)))) (define (read-expr ch) (let ((line (port-line port)) (column (port-column port))) (annotate line column (if (zero? neoteric) (read-expr* ch) (read-neoteric ch))))) (define (read-directive) (define (directive-char? ch) (and (char? ch) (or (eqv? ch #\-) (char-alphabetic? ch) (char-numeric? ch)))) (let ((ch (peek))) (cond ((directive-char? ch) (next) (string->symbol (take-while ch directive-char?))) (else #f)))) (define (skip-scsh-comment) (let lp ((ch (next))) (cond ((eof-object? ch) (error "unterminated `#! ... !#' comment")) ((eqv? ch #\!) (let ((ch (next))) (if (eqv? ch #\#) (next) (lp ch)))) (else (lp (next)))))) (define (process-shebang) ;; After having read #!, we complete either with #!r6rs, ;; #!fold-case, #!no-fold-case, #!curly-infix, ;; #!curly-infix-and-bracket-lists, or a SCSH block comment ;; terminated by !#. (let ((sym (read-directive))) (cond ((eq? sym 'r6rs) (set-reader-option! bitfield:case-insensitive? 0) (set-reader-option! bitfield:r6rs-escapes? 1) (set-reader-option! bitfield:square-brackets? 1) (set-reader-option! bitfield:keyword-style keyword-style-hash-prefix) (set-reader-option! bitfield:hungry-eol-escapes? 1) (next)) ((eq? sym 'fold-case) (set-reader-option! bitfield:case-insensitive? 1) (next)) ((eq? sym 'no-fold-case) (set-reader-option! bitfield:case-insensitive? 0) (next)) ((eq? sym 'curly-infix) (set-reader-option! bitfield:curly-infix? 1) (next)) ((eq? sym 'curly-infix-and-bracket-lists) (set-reader-option! bitfield:curly-infix? 1) (set-reader-option! bitfield:square-brackets? 0) (next)) (else (skip-scsh-comment))))) (define (skip-eol-comment) (let ((ch (next))) (cond ((eof-object? ch) ch) ((eq? ch #\newline) (next)) (else (skip-eol-comment))))) ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be ;; nested. (define (skip-r6rs-block-comment) ;; We have read #|, now looking for |#. (let ((ch (next))) (when (eof-object? ch) (error "unterminated `#| ... |#' comment")) (cond ((and (eqv? ch #\|) (eqv? (peek) #\#)) ;; Done. (next) (values)) ((and (eqv? ch #\#) (eqv? (peek) #\|)) ;; A nested comment. (next) (skip-r6rs-block-comment) (skip-r6rs-block-comment)) (else (skip-r6rs-block-comment))))) (define (read-subexpression what) (let ((ch (next-non-whitespace))) (when (eof-object? ch) (error (string-append "unexpected end of input while reading " what))) (read-expr ch))) (define (next-non-whitespace) (let lp ((ch (next))) (case ch ((#\;) (lp (skip-eol-comment))) ((#\#) (case (peek) ((#\!) (next) (lp (process-shebang))) ((#\;) (next) (read-subexpression "#; comment") (next-non-whitespace)) ((#\|) (if (read-hash-procedure #\|) ch (begin (next) (skip-r6rs-block-comment) (next-non-whitespace)))) (else ch))) ((#\space #\return #\ff #\newline #\tab) (next-non-whitespace)) (else ch)))) (let ((ch (next-non-whitespace))) (if (eof-object? ch) ch (read-expr ch)))) (define* (read #:optional (port (current-input-port))) (define filename (port-filename port)) (define annotate (if (memq 'positions (read-options)) (lambda (line column datum) (when (and (supports-source-properties? datum) ;; Line or column can be invalid via ;; set-port-column! or ungetting chars beyond start ;; of line. (<= 0 line) (<= 1 column)) ;; We always capture the column after one char of lookahead; ;; subtract off that lookahead value. (set-source-properties! datum `((filename . ,filename) (line . ,line) (column . ,(1- column))))) datum) (lambda (line column datum) datum))) (%read port annotate identity)) (define* (read-syntax #:optional (port (current-input-port))) (define filename (port-filename port)) (define (annotate line column datum) ;; Usually when reading compound expressions consisting of multiple ;; syntax objects, like lists, the "leaves" of the expression are ;; annotated but the "root" isn't. Like in (A . B), A and B will be ;; annotated but the pair won't. Therefore the usually correct ;; thing to do is to just annotate the result. However in the case ;; of reading ( . C), the result is the already annotated C, which ;; we don't want to re-annotate. Therefore we avoid re-annotating ;; already annotated objects. (if (syntax? datum) datum (datum->syntax #f ; No lexical context. datum #:source (vector filename line (1- column))))) (%read port annotate syntax->datum))