summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkim Demaille <akim.demaille@gmail.com>2020-03-07 12:59:09 +0100
committerAkim Demaille <akim.demaille@gmail.com>2020-03-08 08:10:11 +0100
commitb6386034773829b10c44ff93ce0a492980684c64 (patch)
treee2c868c81bba7d4bcf21ce2d06a34a5da1896e9d
parente21ff47f5d0b64da693a47b7dd200a1a44a5bbeb (diff)
downloadbison-b6386034773829b10c44ff93ce0a492980684c64.tar.gz
diagnostics: beware of zero-width characters
Currenly we rely on (visual) width of the characters to decide where to open and close the styling of the quoted lines. This breaks when we deal with zero-width characters: we cannot just rely on (visual) columns, we need to know whether we are before, inside, or after the highlighted portion. * src/location.c (location_caret): col_end: no longer add 1, "regular" characters have a width of 1, only 0-width characters have 0-width. opened: replace with 'state', a three-valued enum. Don't reopen the style if we already did. * tests/diagnostics.at (Zero-width characters): New.
-rw-r--r--src/location.c32
-rw-r--r--src/location.h10
-rw-r--r--tests/diagnostics.at26
3 files changed, 45 insertions, 23 deletions
diff --git a/src/location.c b/src/location.c
index dbcd67ec..9f929c00 100644
--- a/src/location.c
+++ b/src/location.c
@@ -421,12 +421,14 @@ location_caret (location loc, const char *style, FILE *out)
{
/* The last column to highlight. Only the first line of
multiline locations are quoted, in which case the ending
- column is the end of line. Single point locations (with
- equal boundaries) denote the character that they
- follow. */
- int col_end
+ column is the end of line.
+
+ We used to work with byte offsets, and that was much
+ easier. However, we went back to using (visual) columns to
+ support truncating of long lines. */
+ const int col_end
= loc.start.line == loc.end.line
- ? loc.end.column + (loc.start.column == loc.end.column)
+ ? loc.end.column
: caret_info.line_len;
/* Quote the file (at most the first line in the case of
multiline locations). */
@@ -436,24 +438,28 @@ location_caret (location loc, const char *style, FILE *out)
expected (maybe the file was changed since the scanner
ran), we might reach the end before we actually saw the
opening column. */
- bool opened = false;
+ enum { before, inside, after } state = before;
while (!mb_iseof (c) && !mb_iseq (c, '\n'))
{
- if (caret_info.pos.column == loc.start.column)
+ // We might have already opened (and even closed!) the
+ // style and yet have the equality of the columns if we
+ // just saw zero-width characters.
+ if (state == before
+ && caret_info.pos.column == loc.start.column)
{
begin_use_class (style, out);
- opened = true;
+ state = inside;
}
if (skip < caret_info.pos.column)
mb_putc (c, out);
boundary_compute (&caret_info.pos, mb_ptr (c), mb_len (c));
caret_getc (c);
- if (opened
+ if (state == inside
&& (caret_info.pos.column == col_end
|| width < caret_info.pos.column - skip))
{
end_use_class (style, out);
- opened = false;
+ state = after;
}
if (width < caret_info.pos.column - skip)
{
@@ -461,11 +467,11 @@ location_caret (location loc, const char *style, FILE *out)
break;
}
}
- // The line is shorter than expected.
- if (opened)
+ if (state == inside)
{
+ // The line is shorter than expected.
end_use_class (style, out);
- opened = false;
+ state = after;
}
putc ('\n', out);
}
diff --git a/src/location.h b/src/location.h
index ccb42e3c..cb3025c6 100644
--- a/src/location.h
+++ b/src/location.h
@@ -42,16 +42,14 @@ typedef struct
/* If positive, the column (starting at 1) just after the boundary.
This is neither a byte count, nor a character count; it is a
- column count. If this is INT_MAX, the column number has
+ (visual) column count. If this is INT_MAX, the column number has
overflowed.
- Meaningless and not displayed if nonpositive.
- */
+ Meaningless and not displayed if nonpositive. */
int column;
- /* If nonnegative, the byte number (starting at 0) in the current line.
- Never displayed, used when printing error messages with colors to
- know where colors start and end. */
+ /* If nonnegative, the byte number (starting at 0) in the current
+ line. Not displayed (unless --trace=location). */
int byte;
} boundary;
diff --git a/tests/diagnostics.at b/tests/diagnostics.at
index cbf56b77..1471934f 100644
--- a/tests/diagnostics.at
+++ b/tests/diagnostics.at
@@ -37,15 +37,15 @@ AT_BISON_OPTION_PUSHDEFS
AT_DATA_GRAMMAR([[input.y]], [$2])
+AT_DATA([experr], [$4])
+
# For some reason, literal ^M in the input are removed and don't end
# in `input.y`. So use the two-character ^M represent it, and let
# Perl insert real CR characters.
-if grep '\^M' input.y >/dev/null; then
- AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}gx' input.y])
+if $EGREP ['\^M|\\[0-9][0-9][0-9]'] input.y experr >/dev/null; then
+ AT_PERL_REQUIRE([-pi -e 's{\^M}{\r}g;s{\\(\d{3}|.)}{$v = $[]1; $v =~ /\A\d+\z/ ? chr($v) : $v}ge' input.y experr])
fi
-AT_DATA([experr], [$4])
-
AT_CHECK([LC_ALL="$locale" $5 bison -fcaret --color=debug -Wall input.y], [$3], [], [experr])
# When no style, same messages, but without style.
@@ -193,6 +193,24 @@ input.y:12.8-10: previous declaration
]])
+## ----------------------- ##
+## Zero-width characters. ##
+## ----------------------- ##
+
+# We used to open twice the styling for characters that have a
+# zero-width on display (e.g., \005).
+
+AT_TEST([[Zero-width characters]],
+[[%%
+exp: an\005error.
+]],
+[1],
+[[input.y:10.8: <error>error:</error> invalid character: '\\005'
+ 10 | exp: an<error>\005</error>error.
+ | <error>^</error>
+]])
+
+
## -------------------------------------- ##
## Tabulations and multibyte characters. ##
## -------------------------------------- ##