summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlan Mackenzie <acm@muc.de>2018-07-01 11:39:03 +0000
committerAlan Mackenzie <acm@muc.de>2018-07-01 11:39:03 +0000
commit4a9b24e1780c980d033b44f3c86133bbab691ebe (patch)
tree400c23dcd89d63c01c9f4a39c330efb90bf93951
parent76eda952b09db6d79342b7ddfcae45c7c836ab62 (diff)
downloademacs-scratch/fontify-open-string.tar.gz
Initial commit. Allow wanted fontification of open string in any mode.scratch/fontify-open-string
The wanted fontification is for the string face to end at the first unescaped newline. This is achieved by a new syntax flag `s' on NL, which means "terminate any open string". src/syntax.c (SYNTAX_FLAGS_CLOSE_STRING, back_maybe_string): New functions. (Fstring_to_syntax, Finternal_describe_syntax_value, scan_lists) (scan_sexps_forward): Adapt to handle the `s' flag. lisp/font-lock.el (font-lock-warn-open-string): New defcustom. (font-lock-fontify-syntactically-region): Enhance to fontify " with warning-face. lisp/progmodes/sh-script.el (sh-mode-syntax-table): Add flag `s' to syntax entry for \n.
-rw-r--r--lisp/font-lock.el24
-rw-r--r--lisp/progmodes/sh-script.el2
-rw-r--r--src/syntax.c193
3 files changed, 208 insertions, 11 deletions
diff --git a/lisp/font-lock.el b/lisp/font-lock.el
index be9fb4dc93f..f2b7fef5c23 100644
--- a/lisp/font-lock.el
+++ b/lisp/font-lock.el
@@ -287,6 +287,16 @@ If a number, only buffers greater than this size have fontification messages."
(integer :tag "size"))
:group 'font-lock
:version "24.1")
+
+(defcustom font-lock-warn-open-string t
+ "Fontify the opening quote of an unterminated string with warning face?
+This is done when this variable is non-nil.
+
+This works only when the syntax-table entry for newline contains the flag `s'
+\(see page \"xxx\" in the Elisp manual)."
+ :type 'boolean
+ :group 'font-lock
+ :version "27.1")
;; Originally these variable values were face names such as `bold' etc.
@@ -1597,18 +1607,30 @@ START should be at the beginning of a line."
(replace-regexp-in-string "^ *" "" comment-end))))
;; Find the `start' state.
(state (syntax-ppss start))
- face beg)
+ face beg in-string s-c-start)
(if loudly (message "Fontifying %s... (syntactically...)" (buffer-name)))
;;
;; Find each interesting place between here and `end'.
(while
(progn
(when (or (nth 3 state) (nth 4 state))
+ (setq s-c-start (nth 8 state))
+ (setq in-string (nth 3 state))
(setq face (funcall font-lock-syntactic-face-function state))
(setq beg (max (nth 8 state) start))
(setq state (parse-partial-sexp (point) end nil nil state
'syntax-table))
(when face (put-text-property beg (point) 'face face))
+;;;; NEW STOUGH, 2018-06-29
+ (put-text-property s-c-start (1+ s-c-start)
+ 'face
+ (if (and font-lock-warn-open-string
+ in-string
+ (not (nth 3 state))
+ (not (eq in-string (char-before))))
+ 'font-lock-warning-face
+ face))
+;;;; END OF NEW STOUGH
(when (and (eq face 'font-lock-comment-face)
(or font-lock-comment-start-skip
comment-start-skip))
diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el
index aaa86b5816f..bf760e0a6cc 100644
--- a/lisp/progmodes/sh-script.el
+++ b/lisp/progmodes/sh-script.el
@@ -429,7 +429,7 @@ name symbol."
(defvar sh-mode-syntax-table
(sh-mode-syntax-table ()
?\# "<"
- ?\n ">#"
+ ?\n ">#s"
?\" "\"\""
?\' "\"'"
?\` "\"`"
diff --git a/src/syntax.c b/src/syntax.c
index c5a4b03955b..b82b091ced2 100644
--- a/src/syntax.c
+++ b/src/syntax.c
@@ -33,7 +33,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
#define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
-/* Eight single-bit flags have the following meanings:
+/* Nine single-bit flags have the following meanings:
1. This character is the first of a two-character comment-start sequence.
2. This character is the second of a two-character comment-start sequence.
3. This character is the first of a two-character comment-end sequence.
@@ -42,6 +42,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
6. The char is part of a delimiter for comments of style "b".
7. This character is part of a nestable comment sequence.
8. The char is part of a delimiter for comments of style "c".
+ 9. The char will close an open string (except one opened by a string-fence).
Note that any two-character sequence whose first character has flag 1
and whose second character has flag 2 will be interpreted as a comment start.
@@ -108,7 +109,11 @@ SYNTAX_FLAGS_COMMENT_NESTED (int flags)
{
return (flags >> 22) & 1;
}
-
+static bool
+SYNTAX_FLAGS_CLOSE_STRING (int flags)
+{
+ return (flags >> 24) & 1;
+}
/* FLAGS should be the flags of the main char of the comment marker, e.g.
the second for comstart and the first for comend. */
static int
@@ -1206,6 +1211,10 @@ the value of a `syntax-table' text property. */)
case 'c':
val |= 1 << 23;
break;
+
+ case 's':
+ val |= 1 << 24;
+ break;
}
if (val < ASIZE (Vsyntax_code_object) && NILP (match))
@@ -1257,6 +1266,8 @@ c (on any of its chars) using this flag:
p means CHAR is a prefix character for `backward-prefix-chars';
such characters are treated as whitespace when they occur
between expressions.
+ s means CHAR will terminate any open string (except one started by a
+ character with generic string fence syntax).
usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
(Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
{
@@ -1294,7 +1305,8 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
(Lisp_Object syntax)
{
int code, syntax_code;
- bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
+ bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested,
+ strclose;
char str[2];
Lisp_Object first, match_lisp, value = syntax;
@@ -1335,6 +1347,7 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
+ strclose = SYNTAX_FLAGS_CLOSE_STRING (syntax_code);
if (Smax <= code)
{
@@ -1368,6 +1381,8 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
insert ("c", 1);
if (comnested)
insert ("n", 1);
+ if (strclose)
+ insert ("s", 1);
insert_string ("\twhich means: ");
@@ -1439,6 +1454,9 @@ DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
insert1 (Fsubstitute_command_keys (prefixdoc));
}
+ if (strclose)
+ insert_string (",\n\t will close any string started by a char with \" syntax");
+
return syntax;
}
@@ -2637,6 +2655,144 @@ syntax_multibyte (int c, bool multibyte_symbol_p)
return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
}
+static bool
+back_maybe_string (ptrdiff_t *from, ptrdiff_t *from_byte,
+ ptrdiff_t stop, bool multibyte_symbol_p)
+{
+ unsigned short int quit_count = 0;
+ enum syntaxcode code = Smax;
+ int syntax = Smax, prev_syntax;
+ ptrdiff_t at = *from, at_byte = *from_byte;
+ ptrdiff_t targ, targ_byte;
+ int c, stringterm;
+ ptrdiff_t defun_start;
+ ptrdiff_t defun_start_byte;
+
+#define DEC_AT \
+ do { \
+ rarely_quit (++quit_count); \
+ prev_syntax = syntax; \
+ DEC_BOTH (at, at_byte); \
+ if (at >= stop) \
+ UPDATE_SYNTAX_TABLE_BACKWARD (at); \
+ if (char_quoted (at, at_byte)) \
+ { \
+ DEC_BOTH (at, at_byte); \
+ syntax = code = Sword; \
+ } \
+ else \
+ { \
+ c = FETCH_CHAR_AS_MULTIBYTE (at_byte); \
+ syntax = SYNTAX_WITH_FLAGS (c); \
+ code = syntax_multibyte (c, multibyte_symbol_p); \
+ } \
+ if (SYNTAX_FLAGS_COMSTART_FIRST (syntax) \
+ && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)) \
+ code = Scomment; \
+ } while (0)
+
+ /* Find the alleged string opener. */
+ while ((at > stop)
+ && (code != Sstring)
+ && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
+ {
+ DEC_AT;
+ }
+ if (code != Sstring)
+ goto lose;
+ stringterm = c;
+ targ = at;
+ targ_byte = at_byte;
+
+ /* Now go back over paired delimiters which are STRINGTERM. */
+ while (true) /* One quoted string per iteration. */
+ {
+ DEC_AT;
+ /* Search back for a terminating string delimiter: */
+ while ((at > stop)
+ && (code != Sstring)
+ && (code != Sstring_fence)
+ && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
+ {
+ DEC_AT;
+ /* Check for comment and "other" strings. */
+ }
+ if ((at <= stop)
+ || SYNTAX_FLAGS_CLOSE_STRING (syntax))
+ goto done;
+ if (code == Sstring_fence)
+ stringterm = ST_STRING_STYLE;
+ else if (code == Sstring)
+ stringterm = c;
+ /* Now search back for the matching opening string delimiter: */
+ DEC_AT;
+ while ((at > stop)
+ && !((stringterm == ST_STRING_STYLE)
+ && (syntax == Sstring_fence))
+ && !((c == stringterm)
+ && (syntax == Sstring))
+ && (!SYNTAX_FLAGS_CLOSE_STRING (syntax)))
+ {
+ if ((syntax == Sstring_fence)
+ || (syntax == Sstring)
+ || (syntax == Scomment))
+ goto lossage;
+ DEC_AT;
+ }
+ if ((at <= stop)
+ || SYNTAX_FLAGS_CLOSE_STRING (syntax))
+ goto lose; /* Even number of string delims in line. */
+ }
+
+ done:
+ UPDATE_SYNTAX_TABLE_FORWARD (targ);
+ *from = targ;
+ *from_byte = targ_byte;
+ return true;
+ lose:
+ UPDATE_SYNTAX_TABLE_FORWARD (*from);
+ return false;
+
+ lossage:
+ /* We've encountered possible comments or strings with mixed
+ delimiters. Bail out and scan forward from a safe position. */
+ {
+ struct lisp_parse_state state;
+ bool adjusted = true;
+
+ defun_start = find_defun_start (*from, *from_byte);
+ defun_start_byte = find_start_value_byte;
+ adjusted = (defun_start > BEGV);
+ internalize_parse_state (Qnil, &state);
+ scan_sexps_forward (&state,
+ defun_start, defun_start_byte,
+ *from, TYPE_MINIMUM (EMACS_INT),
+ 0, 0);
+ if (!adjusted)
+ {
+ adjusted = true;
+ find_start_value
+ = CONSP (state.levelstarts) ? XINT (XCAR (state.levelstarts))
+ : state.thislevelstart >= 0 ? state.thislevelstart
+ : find_start_value;
+ find_start_value_byte = CHAR_TO_BYTE (find_start_value);
+ }
+
+ if ((state.instring != -1)
+ && (state.instring != ST_STRING_STYLE)
+ && (state.comstr_start >= stop))
+ {
+ UPDATE_SYNTAX_TABLE_BACKWARD (state.comstr_start);
+ *from = state.comstr_start;
+ *from_byte = CHAR_TO_BYTE (*from);
+ return true;
+ }
+ /* Syntax table is already valid at *FROM, after the
+ `scan_sexps_forward' */
+ return false;
+ }
+}
+
static Lisp_Object
scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
{
@@ -2803,13 +2959,16 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
while (1)
{
enum syntaxcode c_code;
+ int c_code_flags;
if (from >= stop)
goto lose;
UPDATE_SYNTAX_TABLE_FORWARD (from);
c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c_code = syntax_multibyte (c, multibyte_symbol_p);
+ c_code_flags = SYNTAX_WITH_FLAGS (c);
if (code == Sstring
- ? c == stringterm && c_code == Sstring
+ ? (c == stringterm && c_code == Sstring)
+ || SYNTAX_FLAGS_CLOSE_STRING (c_code_flags)
: c_code == Sstring_fence)
break;
@@ -2965,6 +3124,10 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
for very little gain, so we don't bother either. -sm */
if (found)
from = out_charpos, from_byte = out_bytepos;
+ else if (SYNTAX_FLAGS_CLOSE_STRING (syntax)
+ && back_maybe_string (&from, &from_byte, stop,
+ multibyte_symbol_p))
+ goto done2;
break;
case Scomment_fence:
@@ -3006,7 +3169,14 @@ scan_lists (EMACS_INT from, EMACS_INT count, EMACS_INT depth, bool sexpflag)
}
if (!depth && sexpflag) goto done2;
break;
- default:
+ case Swhitespace:
+ case Spunct:
+ if (SYNTAX_FLAGS_CLOSE_STRING (syntax)
+ && back_maybe_string (&from, &from_byte, stop,
+ multibyte_symbol_p))
+ goto done2;
+ break;
+ default:
/* Ignore whitespace, punctuation, quote, endcomment. */
break;
}
@@ -3046,7 +3216,7 @@ function scans over parentheses until the depth goes to zero COUNT
times. Hence, positive DEPTH moves out that number of levels of
parentheses, while negative DEPTH moves to a deeper level.
-Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
+Comments are skipped over if `parse-sexp-ignore-comments' is non-nil.
If we reach the beginning or end of the accessible part of the buffer
before we have scanned over COUNT lists, return nil if the depth at
@@ -3065,7 +3235,7 @@ DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
If COUNT is negative, scan backwards.
Returns the character number of the position thus found.
-Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
+Comments are skipped over if `parse-sexp-ignore-comments' is non-nil.
If the beginning or end of (the accessible part of) the buffer is reached
in the middle of a parenthetical grouping, an error is signaled.
@@ -3396,10 +3566,12 @@ do { prev_from = from; \
{
int c;
enum syntaxcode c_code;
+ int c_code_flags;
if (from >= end) goto done;
c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
c_code = SYNTAX (c);
+ c_code_flags = SYNTAX_WITH_FLAGS (c);
/* Check C_CODE here so that if the char has
a syntax-table property which says it is NOT
@@ -3421,9 +3593,12 @@ do { prev_from = from; \
break;
default:
- break;
+ if (nofence
+ && SYNTAX_FLAGS_CLOSE_STRING (c_code_flags))
+ goto string_end;
+ break;
}
- INC_FROM;
+ INC_FROM;
rarely_quit (++quit_count);
}
}