summaryrefslogtreecommitdiff
path: root/libcpp/include
diff options
context:
space:
mode:
authordmalcolm <dmalcolm@138bc75d-0d04-0410-961f-82ee72b054a4>2015-11-13 16:29:59 +0000
committerdmalcolm <dmalcolm@138bc75d-0d04-0410-961f-82ee72b054a4>2015-11-13 16:29:59 +0000
commita96cefb2307c770164541705effdc2cc00bbaeed (patch)
treed1ab10c369a8f1ec9f67e1e8cb70acef36897f21 /libcpp/include
parent2cabe7927004ea182f411a587f5bef2279ddc40e (diff)
downloadgcc-a96cefb2307c770164541705effdc2cc00bbaeed.tar.gz
Source range tracking in libcpp and C FE, with bit-packing optimization
This patch combines: [PATCH 05/10] Add ranges to libcpp tokens (via ad-hoc data, unoptimized) [PATCH 06/10] Track expression ranges in C frontend [PATCH 07/10] Add plugin to recursively dump the source-ranges in a tree (v2) [PATCH 08/10] Wire things up so that libcpp users get token underlines [PATCH 09/10] Delay some resolution of ad-hoc locations, preserving ranges [PATCH 10/10] Compress short ranges into source_location [PATCH] libcpp: add examples to source_location description along with fixes for the nits identified during review. gcc/ChangeLog: * Makefile.in (OBJS): Add gcc-rich-location.o. * diagnostic.c (diagnostic_append_note): Pass line_table to rich_location ctor. (emit_diagnostic): Likewise. (inform): Likewise. (inform_n): Likewise. (warning): Likewise. (warning_at): Likewise. (warning_n): Likewise. (pedwarn): Likewise. (permerror): Likewise. (error): Likewise. (error_n): Likewise. (error_at): Likewise. (sorry): Likewise. (fatal_error): Likewise. (internal_error): Likewise. (internal_error_no_backtrace): Likewise. (source_range::debug): Likewise. * gcc-rich-location.c: New file. * gcc-rich-location.h: New file. * genmatch.c (fatal_at): Pass line_table to rich_location ctor. (warning_at): Likewise. * gimple.h (gimple_set_block): Use set_block function. * input.c (dump_line_table_statistics): Dump stats on how many ranges were optimized vs how many needed ad-hoc table. (write_digit_row): Add "map" param; use its range_bits to calculate the per-character offset. (dump_location_info): Print the range and column bits for each ordinary map. Use the range bits to calculate the per-character offset. Pass the map as a new param to the various calls to write_digit_row. Eliminate uses of ORDINARY_MAP_NUMBER_OF_COLUMN_BITS. * print-tree.c (print_node): Print any source range information. * rtl-error.c (diagnostic_for_asm): Likewise. * toplev.c (general_init): Initialize line_table's default_range_bits. * tree-cfg.c (move_block_to_fn): Likewise. (move_block_to_fn): Likewise. * tree-inline.c (copy_phis_for_bb): Likewise. * tree.c (tree_set_block): Likewise. (get_pure_location): New function. (set_source_range): New functions. (set_block): New function. (set_source_range): New functions. * tree.h (CAN_HAVE_RANGE_P): New. (EXPR_LOCATION_RANGE): New. (EXPR_HAS_RANGE): New. (get_expr_source_range): New inline function. (DECL_LOCATION_RANGE): New. (set_source_range): New decls. (get_decl_source_range): New inline function. gcc/ada/ChangeLog: * gcc-interface/trans.c (Sloc_to_locus): Add line_table param when calling linemap_position_for_line_and_column. gcc/c-family/ChangeLog: * c-common.c (c_fully_fold_internal): Capture existing souce_range, and store it on the result. * c-opts.c (c_common_init_options): Set global_dc->colorize_source_p. gcc/c/ChangeLog: * c-decl.c (warn_defaults_to): Pass line_table to rich_location ctor. * c-errors.c (pedwarn_c99): Likewise. (pedwarn_c90): Likewise. * c-parser.c (set_c_expr_source_range): New functions. (c_token::get_range): New method. (c_token::get_finish): New method. (c_parser_expr_no_commas): Call set_c_expr_source_range on the ret based on the range from the start of the LHS to the end of the RHS. (c_parser_conditional_expression): Likewise, based on the range from the start of the cond.value to the end of exp2.value. (c_parser_binary_expression): Call set_c_expr_source_range on the stack values for TRUTH_ANDIF_EXPR and TRUTH_ORIF_EXPR. (c_parser_cast_expression): Call set_c_expr_source_range on ret based on the cast_loc through to the end of the expr. (c_parser_unary_expression): Likewise, based on the op_loc through to the end of op. (c_parser_sizeof_expression) Likewise, based on the start of the sizeof token through to either the closing paren or the end of expr. (c_parser_postfix_expression): Likewise, using the token range, or from the open paren through to the close paren for parenthesized expressions. (c_parser_postfix_expression_after_primary): Likewise, for various kinds of expression. * c-tree.h (struct c_expr): Add field "src_range". (c_expr::get_start): New method. (c_expr::get_finish): New method. (set_c_expr_source_range): New decls. * c-typeck.c (parser_build_unary_op): Call set_c_expr_source_range on ret for prefix unary ops. (parser_build_binary_op): Likewise, running from the start of arg1.value through to the end of arg2.value. gcc/cp/ChangeLog: * error.c (pedwarn_cxx98): Pass line_table to rich_location ctor. gcc/fortran/ChangeLog: * error.c (gfc_warning): Pass line_table to rich_location ctor. (gfc_warning_now_at): Likewise. (gfc_warning_now): Likewise. (gfc_error_now): Likewise. (gfc_fatal_error): Likewise. (gfc_error): Likewise. (gfc_internal_error): Likewise. gcc/testsuite/ChangeLog: * gcc.dg/diagnostic-token-ranges.c: New file. * gcc.dg/diagnostic-tree-expr-ranges-2.c: New file. * gcc.dg/plugin/diagnostic-test-expressions-1.c: New file. * gcc.dg/plugin/diagnostic-test-show-trees-1.c: New file. * gcc.dg/plugin/diagnostic_plugin_show_trees.c: New file. * gcc.dg/plugin/diagnostic_plugin_test_show_locus.c (get_loc): Add line_table param when calling linemap_position_for_line_and_column. (test_show_locus): Pass line_table to rich_location ctors. (plugin_init): Remove setting of global_dc->colorize_source_p. * gcc.dg/plugin/diagnostic_plugin_test_tree_expression_range.c: New file. * gcc.dg/plugin/plugin.exp (plugin_test_list): Add diagnostic_plugin_test_tree_expression_range.c, diagnostic-test-expressions-1.c, diagnostic_plugin_show_trees.c, and diagnostic-test-show-trees-1.c. libcpp/ChangeLog: * errors.c (cpp_diagnostic): Pass pfile->line_table to rich_location ctor. (cpp_diagnostic_with_line): Likewise. * include/cpplib.h (struct cpp_token): Update comment for src_loc to indicate that the range of the token is "baked into" the source_location. * include/line-map.h (source_location): Update the descriptive comment to reflect the packing scheme for short ranges, adding worked examples of location encoding. (struct line_map_ordinary): Drop field "column_bits" in favor of field "m_column_and_range_bits"; add field "m_range_bits". (ORDINARY_MAP_NUMBER_OF_COLUMN_BITS): Delete. (location_adhoc_data): Add source_range field. (struct line_maps): Add fields "default_range_bits", "num_optimized_ranges" and "num_unoptimized_ranges". (get_combined_adhoc_loc): Add source_range param. (get_range_from_loc): New declaration. (pure_location_p): New prototype. (COMBINE_LOCATION_DATA): Add source_range param. (SOURCE_LINE): Update for renaming of column_bits. (SOURCE_COLUMN): Likewise. Shift the column right by the map's range_bits. (LAST_SOURCE_LINE_LOCATION): Update for renaming of column_bits. (linemap_position_for_line_and_column): Add line_maps * params. (rich_location::rich_location): Likewise. * lex.c (_cpp_lex_direct): Capture the range of the token, baking it into token->src_loc via a call to COMBINE_LOCATION_DATA. * line-map.c (LINE_MAP_MAX_COLUMN_NUMBER): Reduce from 1U << 17 to 1U << 12. (location_adhoc_data_hash): Add the src_range into the hash value. (location_adhoc_data_eq): Require equality of the src_range values. (can_be_stored_compactly_p): New function. (get_combined_adhoc_loc): Add src_range param, and store it, via a bit-packing scheme for short ranges, otherwise within the lookaside table. Remove the requirement that data is non-NULL. (get_range_from_adhoc_loc): New function. (get_range_from_loc): New function. (pure_location_p): New function. (linemap_add): Ensure that start_location has zero for the range_bits, unless we're past LINE_MAP_MAX_LOCATION_WITH_COLS. Initialize range_bits to zero. Assert that the start_location is "pure". (linemap_line_start): Assert that the column_and_range_bits >= range_bits. Update determinination of whether we need to start a new map using the effective column bits, without the range bits. Use the set's default_range_bits in new maps, apart from those with column_bits == 0, which should also have 0 range_bits. Increase the column bits for new maps by the range bits. When adding lines to an existing map, use set->highest_line directly rather than offsetting highest by SOURCE_COLUMN. Add assertions to sanity-check the return value. (linemap_position_for_column): Offset to_column by range_bits. Update set->highest_location if necessary. (linemap_position_for_line_and_column): Add line_maps * param. Update the calculation to offset the column by range_bits, and conditionalize it on being <= LINE_MAP_MAX_LOCATION_WITH_COLS. Bound it by LINEMAPS_MACRO_LOWEST_LOCATION. Update set->highest_location if necessary. (linemap_position_for_loc_and_offset): Handle ad-hoc locations; pass "set" to linemap_position_for_line_and_column. (linemap_macro_map_loc_unwind_toward_spelling): Add line_maps param. Handle ad-hoc locations. (linemap_location_in_system_header_p): Pass on "set" to call to linemap_macro_map_loc_unwind_toward_spelling. (linemap_macro_loc_to_spelling_point): Retain ad-hoc locations. Pass on "set" to call to linemap_macro_map_loc_unwind_toward_spelling. (linemap_resolve_location): Retain ad-hoc locations. Pass on "set" to call to linemap_macro_map_loc_unwind_toward_spelling. (linemap_unwind_toward_expansion): Pass on "set" to call to linemap_macro_map_loc_unwind_toward_spelling. (linemap_expand_location): Extract the data pointer before extracting the location. (rich_location::rich_location): Add line_maps param; use it to extract the range from the source_location. * location-example.txt: Regenerate, showing new representation. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@230331 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp/include')
-rw-r--r--libcpp/include/cpplib.h3
-rw-r--r--libcpp/include/line-map.h219
2 files changed, 190 insertions, 32 deletions
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index a2bdfa0c808..f5c2a21e952 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -237,7 +237,8 @@ struct GTY(()) cpp_identifier {
/* A preprocessing token. This has been carefully packed and should
occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */
struct GTY(()) cpp_token {
- source_location src_loc; /* Location of first char of token. */
+ source_location src_loc; /* Location of first char of token,
+ together with range of full token. */
ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */
unsigned short flags; /* flags - see above */
diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index c9340a6eaef..e7608f1e468 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -47,7 +47,8 @@ enum lc_reason
typedef unsigned int linenum_type;
/* The typedef "source_location" is a key within the location database,
- identifying a source location or macro expansion.
+ identifying a source location or macro expansion, along with range
+ information, and (optionally) a pointer for use by gcc.
This key only has meaning in relation to a line_maps instance. Within
gcc there is a single line_maps instance: "line_table", declared in
@@ -69,13 +70,48 @@ typedef unsigned int linenum_type;
| ordmap[0]->start_location) | first line in ordmap 0
-----------+-------------------------------+-------------------------------
| ordmap[1]->start_location | First line in ordmap 1
- | ordmap[1]->start_location+1 | First column in that line
- | ordmap[1]->start_location+2 | 2nd column in that line
- | | Subsequent lines are offset by
- | | (1 << column_bits),
- | | e.g. 128 for 7 bits, with a
- | | column value of 0 representing
- | | "the whole line".
+ | ordmap[1]->start_location+32 | First column in that line
+ | (assuming range_bits == 5) |
+ | ordmap[1]->start_location+64 | 2nd column in that line
+ | ordmap[1]->start_location+4096| Second line in ordmap 1
+ | (assuming column_bits == 12)
+ |
+ | Subsequent lines are offset by (1 << column_bits),
+ | e.g. 4096 for 12 bits, with a column value of 0 representing
+ | "the whole line".
+ |
+ | Within a line, the low "range_bits" (typically 5) are used for
+ | storing short ranges, so that there's an offset of
+ | (1 << range_bits) between individual columns within a line,
+ | typically 32.
+ | The low range_bits store the offset of the end point from the
+ | start point, and the start point is found by masking away
+ | the range bits.
+ |
+ | For example:
+ | ordmap[1]->start_location+64 "2nd column in that line"
+ | above means a caret at that location, with a range
+ | starting and finishing at the same place (the range bits
+ | are 0), a range of length 1.
+ |
+ | By contrast:
+ | ordmap[1]->start_location+68
+ | has range bits 0x4, meaning a caret with a range starting at
+ | that location, but with endpoint 4 columns further on: a range
+ | of length 5.
+ |
+ | Ranges that have caret != start, or have an endpoint too
+ | far away to fit in range_bits are instead stored as ad-hoc
+ | locations. Hence for range_bits == 5 we can compactly store
+ | tokens of length <= 32 without needing to use the ad-hoc
+ | table.
+ |
+ | This packing scheme means we effectively have
+ | (column_bits - range_bits)
+ | of bits for the columns, typically (12 - 5) = 7, for 128
+ | columns; longer line widths are accomodated by starting a
+ | new ordmap with a higher column_bits.
+ |
| ordmap[2]->start_location-1 | Final location in ordmap 1
-----------+-------------------------------+-------------------------------
| ordmap[2]->start_location | First line in ordmap 2
@@ -127,8 +163,101 @@ typedef unsigned int linenum_type;
0xffffffff | UINT_MAX |
-----------+-------------------------------+-------------------------------
- To see how this works in practice, see the worked example in
- libcpp/location-example.txt. */
+ Examples of location encoding.
+
+ Packed ranges
+ =============
+
+ Consider encoding the location of a token "foo", seen underlined here
+ on line 523, within an ordinary line_map that starts at line 500:
+
+ 11111111112
+ 12345678901234567890
+ 522
+ 523 return foo + bar;
+ ^~~
+ 524
+
+ The location's caret and start are both at line 523, column 11; the
+ location's finish is on the same line, at column 13 (an offset of 2
+ columns, for length 3).
+
+ Line 523 is offset 23 from the starting line of the ordinary line_map.
+
+ caret == start, and the offset of the finish fits within 5 bits, so
+ this can be stored as a packed range.
+
+ This is encoded as:
+ ordmap->start
+ + (line_offset << ordmap->m_column_and_range_bits)
+ + (column << ordmap->m_range_bits)
+ + (range_offset);
+ i.e. (for line offset 23, column 11, range offset 2):
+ ordmap->start
+ + (23 << 12)
+ + (11 << 5)
+ + 2;
+ i.e.:
+ ordmap->start + 0x17162
+ assuming that the line_map uses the default of 7 bits for columns and
+ 5 bits for packed range (giving 12 bits for m_column_and_range_bits).
+
+
+ "Pure" locations
+ ================
+
+ These are a special case of the above, where
+ caret == start == finish
+ They are stored as packed ranges with offset == 0.
+ For example, the location of the "f" of "foo" could be stored
+ as above, but with range offset 0, giving:
+ ordmap->start
+ + (23 << 12)
+ + (11 << 5)
+ + 0;
+ i.e.:
+ ordmap->start + 0x17160
+
+
+ Unoptimized ranges
+ ==================
+
+ Consider encoding the location of the binary expression
+ below:
+
+ 11111111112
+ 12345678901234567890
+ 521
+ 523 return foo + bar;
+ ~~~~^~~~~
+ 523
+
+ The location's caret is at the "+", line 523 column 15, but starts
+ earlier, at the "f" of "foo" at column 11. The finish is at the "r"
+ of "bar" at column 19.
+
+ This can't be stored as a packed range since start != caret.
+ Hence it is stored as an ad-hoc location e.g. 0x80000003.
+
+ Stripping off the top bit gives us an index into the ad-hoc
+ lookaside table:
+
+ line_table->location_adhoc_data_map.data[0x3]
+
+ from which the caret, start and finish can be looked up,
+ encoded as "pure" locations:
+
+ start == ordmap->start + (23 << 12) + (11 << 5)
+ == ordmap->start + 0x17160 (as above; the "f" of "foo")
+
+ caret == ordmap->start + (23 << 12) + (15 << 5)
+ == ordmap->start + 0x171e0
+
+ finish == ordmap->start + (23 << 12) + (19 << 5)
+ == ordmap->start + 0x17260
+
+ To further see how source_location works in practice, see the
+ worked example in libcpp/location-example.txt. */
typedef unsigned int source_location;
/* A range of source locations.
@@ -217,8 +346,9 @@ struct GTY((tag ("0"), desc ("%h.reason == LC_ENTER_MACRO ? 2 : 1"))) line_map {
Physical source file TO_FILE at line TO_LINE at column 0 is represented
by the logical START_LOCATION. TO_LINE+L at column C is represented by
- START_LOCATION+(L*(1<<column_bits))+C, as long as C<(1<<column_bits),
- and the result_location is less than the next line_map's start_location.
+ START_LOCATION+(L*(1<<m_column_and_range_bits))+(C*1<<m_range_bits), as
+ long as C<(1<<effective range bits), and the result_location is less than
+ the next line_map's start_location.
(The top line is line 1 and the leftmost column is column 1; line/column 0
means "entire file/line" or "unknown line/column" or "not applicable".)
@@ -238,8 +368,24 @@ struct GTY((tag ("1"))) line_map_ordinary : public line_map {
cpp_buffer. */
unsigned char sysp;
- /* Number of the low-order source_location bits used for a column number. */
- unsigned int column_bits : 8;
+ /* Number of the low-order source_location bits used for column numbers
+ and ranges. */
+ unsigned int m_column_and_range_bits : 8;
+
+ /* Number of the low-order "column" bits used for storing short ranges
+ inline, rather than in the ad-hoc table.
+ MSB LSB
+ 31 0
+ +-------------------------+-------------------------------------------+
+ | |<---map->column_and_range_bits (e.g. 12)-->|
+ +-------------------------+-----------------------+-------------------+
+ | | column_and_range_bits | map->range_bits |
+ | | - range_bits | |
+ +-------------------------+-----------------------+-------------------+
+ | row bits | effective column bits | short range bits |
+ | | (e.g. 7) | (e.g. 5) |
+ +-------------------------+-----------------------+-------------------+ */
+ unsigned int m_range_bits : 8;
};
/* This is the highest possible source location encoded within an
@@ -435,15 +581,6 @@ ORDINARY_MAP_IN_SYSTEM_HEADER_P (const line_map_ordinary *ord_map)
return ord_map->sysp;
}
-/* Get the number of the low-order source_location bits used for a
- column number within ordinary map MAP. */
-
-inline unsigned char
-ORDINARY_MAP_NUMBER_OF_COLUMN_BITS (const line_map_ordinary *ord_map)
-{
- return ord_map->column_bits;
-}
-
/* Get the filename of ordinary map MAP. */
inline const char *
@@ -524,9 +661,11 @@ struct GTY(()) maps_info_macro {
unsigned int cache;
};
-/* Data structure to associate an arbitrary data to a source location. */
+/* Data structure to associate a source_range together with an arbitrary
+ data pointer with a source location. */
struct GTY(()) location_adhoc_data {
source_location locus;
+ source_range src_range;
void * GTY((skip)) data;
};
@@ -588,6 +727,12 @@ struct GTY(()) line_maps {
/* True if we've seen a #line or # 44 "file" directive. */
bool seen_line_directive;
+
+ /* The default value of range_bits in ordinary line maps. */
+ unsigned int default_range_bits;
+
+ unsigned int num_optimized_ranges;
+ unsigned int num_unoptimized_ranges;
};
/* Returns the number of allocated maps so far. MAP_KIND shall be TRUE
@@ -825,11 +970,15 @@ LINEMAPS_LAST_ALLOCATED_MACRO_MAP (const line_maps *set)
extern void location_adhoc_data_fini (struct line_maps *);
extern source_location get_combined_adhoc_loc (struct line_maps *,
- source_location, void *);
+ source_location,
+ source_range,
+ void *);
extern void *get_data_from_adhoc_loc (struct line_maps *, source_location);
extern source_location get_location_from_adhoc_loc (struct line_maps *,
source_location);
+extern source_range get_range_from_loc (line_maps *set, source_location loc);
+
/* Get whether location LOC is an ad-hoc location. */
inline bool
@@ -838,14 +987,21 @@ IS_ADHOC_LOC (source_location loc)
return (loc & MAX_SOURCE_LOCATION) != loc;
}
+/* Get whether location LOC is a "pure" location, or
+ whether it is an ad-hoc location, or embeds range information. */
+
+bool
+pure_location_p (line_maps *set, source_location loc);
+
/* Combine LOC and BLOCK, giving a combined adhoc location. */
inline source_location
COMBINE_LOCATION_DATA (struct line_maps *set,
source_location loc,
+ source_range src_range,
void *block)
{
- return get_combined_adhoc_loc (set, loc, block);
+ return get_combined_adhoc_loc (set, loc, src_range, block);
}
extern void rebuild_location_adhoc_htab (struct line_maps *);
@@ -931,7 +1087,7 @@ inline linenum_type
SOURCE_LINE (const line_map_ordinary *ord_map, source_location loc)
{
return ((loc - ord_map->start_location)
- >> ord_map->column_bits) + ord_map->to_line;
+ >> ord_map->m_column_and_range_bits) + ord_map->to_line;
}
/* Convert a map and source_location to source column number. */
@@ -939,7 +1095,7 @@ inline linenum_type
SOURCE_COLUMN (const line_map_ordinary *ord_map, source_location loc)
{
return ((loc - ord_map->start_location)
- & ((1 << ord_map->column_bits) - 1));
+ & ((1 << ord_map->m_column_and_range_bits) - 1)) >> ord_map->m_range_bits;
}
/* Return the location of the last source line within an ordinary
@@ -949,7 +1105,7 @@ LAST_SOURCE_LINE_LOCATION (const line_map_ordinary *map)
{
return (((map[1].start_location - 1
- map->start_location)
- & ~((1 << map->column_bits) - 1))
+ & ~((1 << map->m_column_and_range_bits) - 1))
+ map->start_location);
}
@@ -999,7 +1155,8 @@ linemap_position_for_column (struct line_maps *, unsigned int);
/* Encode and return a source location from a given line and
column. */
source_location
-linemap_position_for_line_and_column (const line_map_ordinary *,
+linemap_position_for_line_and_column (line_maps *set,
+ const line_map_ordinary *,
linenum_type, unsigned int);
/* Encode and return a source_location starting from location LOC and
@@ -1187,7 +1344,7 @@ class rich_location
/* Constructors. */
/* Constructing from a location. */
- rich_location (source_location loc);
+ rich_location (line_maps *set, source_location loc);
/* Constructing from a source_range. */
rich_location (source_range src_range);