summaryrefslogtreecommitdiff
path: root/libcpp/charset.c
diff options
context:
space:
mode:
authorLewis Hyatt <lhyatt@gmail.com>2020-07-14 12:05:56 -0400
committerLewis Hyatt <lhyatt@gmail.com>2020-07-14 12:05:56 -0400
commit004bb936d6d5f177af26ad4905595e843d5665a5 (patch)
treefe3ee343abc249a14509943f4e731f8fb73f6206 /libcpp/charset.c
parent5f809982e8e05c39029431363e08759d6c750f0e (diff)
downloadgcc-004bb936d6d5f177af26ad4905595e843d5665a5.tar.gz
diagnostics: Support conversion of tabs to spaces [PR49973] [PR86904]
Supports conversion of tabs to spaces when outputting diagnostics. Also adds -fdiagnostics-column-unit and -fdiagnostics-column-origin options to control how the column number is output, thereby resolving the two PRs. gcc/c-family/ChangeLog: PR other/86904 * c-indentation.c (should_warn_for_misleading_indentation): Get global tabstop from the new source. * c-opts.c (c_common_handle_option): Remove handling of -ftabstop, which is now a common option. * c.opt: Likewise. gcc/ChangeLog: PR preprocessor/49973 PR other/86904 * common.opt: Handle -ftabstop here instead of in c-family options. Add -fdiagnostics-column-unit= and -fdiagnostics-column-origin= options. * opts.c (common_handle_option): Handle the new options. * diagnostic-format-json.cc (json_from_expanded_location): Add diagnostic_context argument. Use it to convert column numbers as per the new options. (json_from_location_range): Likewise. (json_from_fixit_hint): Likewise. (json_end_diagnostic): Pass the new context argument to helper functions above. Add "column-origin" field to the output. (test_unknown_location): Add the new context argument to calls to helper functions. (test_bad_endpoints): Likewise. * diagnostic-show-locus.c (exploc_with_display_col::exploc_with_display_col): Support tabstop parameter. (layout_point::layout_point): Make use of class exploc_with_display_col. (layout_range::layout_range): Likewise. (struct line_bounds): Clarify that the units are now always display columns. Rename members accordingly. Add constructor. (layout::print_source_line): Add support for tab expansion. (make_range): Adapt to class layout_range changes. (layout::maybe_add_location_range): Likewise. (layout::layout): Adapt to class exploc_with_display_col changes. (layout::calculate_x_offset_display): Support tabstop parameter. (layout::print_annotation_line): Adapt to struct line_bounds changes. (layout::print_line): Likewise. (line_label::line_label): Add diagnostic_context argument. (get_affected_range): Likewise. (get_printed_columns): Likewise. (layout::print_any_labels): Adapt to struct line_label changes. (class correction): Add m_tabstop member. (correction::correction): Add tabstop argument. (correction::compute_display_cols): Use m_tabstop. (class line_corrections): Add m_context member. (line_corrections::line_corrections): Add diagnostic_context argument. (line_corrections::add_hint): Use m_context to handle tabstops. (layout::print_trailing_fixits): Adapt to class line_corrections changes. (test_layout_x_offset_display_utf8): Support tabstop parameter. (test_layout_x_offset_display_tab): New selftest. (test_one_liner_colorized_utf8): Likewise. (test_tab_expansion): Likewise. (test_diagnostic_show_locus_one_liner_utf8): Call the new tests. (diagnostic_show_locus_c_tests): Likewise. (test_overlapped_fixit_printing): Adapt to helper class and function changes. (test_overlapped_fixit_printing_utf8): Likewise. (test_overlapped_fixit_printing_2): Likewise. * diagnostic.h (enum diagnostics_column_unit): New enum. (struct diagnostic_context): Add members for the new options. (diagnostic_converted_column): Declare. (json_from_expanded_location): Add new context argument. * diagnostic.c (diagnostic_initialize): Initialize new members. (diagnostic_converted_column): New function. (maybe_line_and_column): Be willing to output a column of 0. (diagnostic_get_location_text): Convert column number as per the new options. (diagnostic_report_current_module): Likewise. (assert_location_text): Add origin and column_unit arguments for testing the new functionality. (test_diagnostic_get_location_text): Test the new functionality. * doc/invoke.texi: Document the new options and behavior. * input.h (location_compute_display_column): Add tabstop argument. * input.c (location_compute_display_column): Likewise. (test_cpp_utf8): Add selftests for tab expansion. * tree-diagnostic-path.cc (default_tree_make_json_for_path): Pass the new context argument to json_from_expanded_location(). libcpp/ChangeLog: PR preprocessor/49973 PR other/86904 * include/cpplib.h (struct cpp_options): Removed support for -ftabstop, which is now handled by diagnostic_context. (class cpp_display_width_computation): New class. (cpp_byte_column_to_display_column): Add optional tabstop argument. (cpp_display_width): Likewise. (cpp_display_column_to_byte_column): Likewise. * charset.c (cpp_display_width_computation::cpp_display_width_computation): New function. (cpp_display_width_computation::advance_display_cols): Likewise. (compute_next_display_width): Removed and implemented this functionality in a new function... (cpp_display_width_computation::process_next_codepoint): ...here. (cpp_byte_column_to_display_column): Added tabstop argument. Reimplemented in terms of class cpp_display_width_computation. (cpp_display_column_to_byte_column): Likewise. * init.c (cpp_create_reader): Remove handling of -ftabstop, which is now handled by diagnostic_context. gcc/testsuite/ChangeLog: PR preprocessor/49973 PR other/86904 * c-c++-common/Wmisleading-indentation-3.c: Adjust expected output for new defaults. * c-c++-common/Wmisleading-indentation.c: Likewise. * c-c++-common/diagnostic-format-json-1.c: Likewise. * c-c++-common/diagnostic-format-json-2.c: Likewise. * c-c++-common/diagnostic-format-json-3.c: Likewise. * c-c++-common/diagnostic-format-json-4.c: Likewise. * c-c++-common/diagnostic-format-json-5.c: Likewise. * c-c++-common/missing-close-symbol.c: Likewise. * g++.dg/diagnostic/bad-binary-ops.C: Likewise. * g++.dg/parse/error4.C: Likewise. * g++.old-deja/g++.brendan/crash11.C: Likewise. * g++.old-deja/g++.pt/overload2.C: Likewise. * g++.old-deja/g++.robertl/eb109.C: Likewise. * gcc.dg/analyzer/malloc-paths-9.c: Likewise. * gcc.dg/bad-binary-ops.c: Likewise. * gcc.dg/format/branch-1.c: Likewise. * gcc.dg/format/pr79210.c: Likewise. * gcc.dg/plugin/diagnostic-test-expressions-1.c: Likewise. * gcc.dg/plugin/diagnostic-test-string-literals-1.c: Likewise. * gcc.dg/redecl-4.c: Likewise. * gfortran.dg/diagnostic-format-json-1.F90: Likewise. * gfortran.dg/diagnostic-format-json-2.F90: Likewise. * gfortran.dg/diagnostic-format-json-3.F90: Likewise. * go.dg/arrayclear.go: Add a comment explaining why adding a comment was necessary to work around a dejagnu bug. * c-c++-common/diagnostic-units-1.c: New test. * c-c++-common/diagnostic-units-2.c: New test. * c-c++-common/diagnostic-units-3.c: New test. * c-c++-common/diagnostic-units-4.c: New test. * c-c++-common/diagnostic-units-5.c: New test. * c-c++-common/diagnostic-units-6.c: New test. * c-c++-common/diagnostic-units-7.c: New test. * c-c++-common/diagnostic-units-8.c: New test.
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r--libcpp/charset.c98
1 files changed, 68 insertions, 30 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c
index db47235b847..28b81c9c864 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -2276,49 +2276,90 @@ cpp_string_location_reader::get_next ()
return result;
}
-/* Helper for cpp_byte_column_to_display_column and its inverse. Given a
- pointer to a UTF-8-encoded character, compute its display width. *INBUFP
- points on entry to the start of the UTF-8 encoding of the character, and
- is updated to point just after the last byte of the encoding. *INBYTESLEFTP
- contains on entry the remaining size of the buffer into which *INBUFP
- points, and this is also updated accordingly. If *INBUFP does not
+cpp_display_width_computation::
+cpp_display_width_computation (const char *data, int data_length, int tabstop) :
+ m_begin (data),
+ m_next (m_begin),
+ m_bytes_left (data_length),
+ m_tabstop (tabstop),
+ m_display_cols (0)
+{
+ gcc_assert (m_tabstop > 0);
+}
+
+
+/* The main implementation function for class cpp_display_width_computation.
+ m_next points on entry to the start of the UTF-8 encoding of the next
+ character, and is updated to point just after the last byte of the encoding.
+ m_bytes_left contains on entry the remaining size of the buffer into which
+ m_next points, and this is also updated accordingly. If m_next does not
point to a valid UTF-8-encoded sequence, then it will be treated as a single
- byte with display width 1. */
+ byte with display width 1. m_cur_display_col is the current display column,
+ relative to which tab stops should be expanded. Returns the display width of
+ the codepoint just processed. */
-static inline int
-compute_next_display_width (const uchar **inbufp, size_t *inbytesleftp)
+int
+cpp_display_width_computation::process_next_codepoint ()
{
cppchar_t c;
- if (one_utf8_to_cppchar (inbufp, inbytesleftp, &c) != 0)
+ int next_width;
+
+ if (*m_next == '\t')
+ {
+ ++m_next;
+ --m_bytes_left;
+ next_width = m_tabstop - (m_display_cols % m_tabstop);
+ }
+ else if (one_utf8_to_cppchar ((const uchar **) &m_next, &m_bytes_left, &c)
+ != 0)
{
/* Input is not convertible to UTF-8. This could be fine, e.g. in a
string literal, so don't complain. Just treat it as if it has a width
of one. */
- ++*inbufp;
- --*inbytesleftp;
- return 1;
+ ++m_next;
+ --m_bytes_left;
+ next_width = 1;
+ }
+ else
+ {
+ /* one_utf8_to_cppchar() has updated m_next and m_bytes_left for us. */
+ next_width = cpp_wcwidth (c);
}
- /* one_utf8_to_cppchar() has updated inbufp and inbytesleftp for us. */
- return cpp_wcwidth (c);
+ m_display_cols += next_width;
+ return next_width;
+}
+
+/* Utility to advance the byte stream by the minimum amount needed to consume
+ N display columns. Returns the number of display columns that were
+ actually skipped. This could be less than N, if there was not enough data,
+ or more than N, if the last character to be skipped had a sufficiently large
+ display width. */
+int
+cpp_display_width_computation::advance_display_cols (int n)
+{
+ const int start = m_display_cols;
+ const int target = start + n;
+ while (m_display_cols < target && !done ())
+ process_next_codepoint ();
+ return m_display_cols - start;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
how many display columns are occupied by the first COLUMN bytes. COLUMN
may exceed DATA_LENGTH, in which case the phantom bytes at the end are
- treated as if they have display width 1. */
+ treated as if they have display width 1. Tabs are expanded to the next tab
+ stop, relative to the start of DATA. */
int
cpp_byte_column_to_display_column (const char *data, int data_length,
- int column)
+ int column, int tabstop)
{
- int display_col = 0;
- const uchar *udata = (const uchar *) data;
const int offset = MAX (0, column - data_length);
- size_t inbytesleft = column - offset;
- while (inbytesleft)
- display_col += compute_next_display_width (&udata, &inbytesleft);
- return display_col + offset;
+ cpp_display_width_computation dw (data, column - offset, tabstop);
+ while (!dw.done ())
+ dw.process_next_codepoint ();
+ return dw.display_cols_processed () + offset;
}
/* For the string of length DATA_LENGTH bytes that begins at DATA, compute
@@ -2328,14 +2369,11 @@ cpp_byte_column_to_display_column (const char *data, int data_length,
int
cpp_display_column_to_byte_column (const char *data, int data_length,
- int display_col)
+ int display_col, int tabstop)
{
- int column = 0;
- const uchar *udata = (const uchar *) data;
- size_t inbytesleft = data_length;
- while (column < display_col && inbytesleft)
- column += compute_next_display_width (&udata, &inbytesleft);
- return data_length - inbytesleft + MAX (0, display_col - column);
+ cpp_display_width_computation dw (data, data_length, tabstop);
+ const int avail_display = dw.advance_display_cols (display_col);
+ return dw.bytes_processed () + MAX (0, display_col - avail_display);
}
/* Our own version of wcwidth(). We don't use the actual wcwidth() in glibc,