summaryrefslogtreecommitdiff
path: root/gcc/c-family/c-indentation.c
blob: 544b0d43739666eaf692fd1afa9a417903385e7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
/* Implementation of -Wmisleading-indentation
   Copyright (C) 2015 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "alias.h"
#include "tree.h"
#include "stringpool.h"
#include "stor-layout.h"
#include "c-common.h"
#include "c-indentation.h"

extern cpp_options *cpp_opts;

/* Convert libcpp's notion of a column (a 1-based char count) to
   the "visual column" (0-based column, respecting tabs), by reading the
   relevant line.
   Returns true if a conversion was possible, writing the result to OUT,
   otherwise returns false.  */

static bool
get_visual_column (expanded_location exploc, unsigned int *out)
{
  int line_len;
  const char *line = location_get_source_line (exploc, &line_len);
  if (!line)
    return false;
  unsigned int vis_column = 0;
  for (int i = 1; i < exploc.column; i++)
    {
      unsigned char ch = line[i - 1];
      if (ch == '\t')
       {
	 /* Round up to nearest tab stop. */
	 const unsigned int tab_width = cpp_opts->tabstop;
	 vis_column = ((vis_column + tab_width) / tab_width) * tab_width;
       }
      else
       vis_column++;
    }

  *out = vis_column;
  return true;
}

/* Is the token at LOC the first non-whitespace on its line?
   Helper function for should_warn_for_misleading_indentation.  */

static bool
is_first_nonwhitespace_on_line (expanded_location exploc)
{
  int line_len;
  const char *line = location_get_source_line (exploc, &line_len);

   /* If we can't determine it, return false so that we don't issue a
      warning.  This is sometimes the case for input files
      containing #line directives, and these are often for autogenerated
      sources (e.g. from .md files), where it's not clear that it's
      meaningful to look at indentation.  */
  if (!line)
    return false;

  for (int i = 1; i < exploc.column; i++)
    {
      unsigned char ch = line[i - 1];
      if (!ISSPACE (ch))
	return false;
    }
  return true;
}

/* Does the given source line appear to contain a #if directive?
   (or #ifdef/#ifndef).  Ignore the possibility of it being inside a
   comment, for simplicity.
   Helper function for detect_preprocessor_logic.  */

static bool
line_contains_hash_if (const char *file, int line_num)
{
  expanded_location exploc;
  exploc.file = file;
  exploc.line = line_num;
  exploc.column = 1;

  int line_len;
  const char *line = location_get_source_line (exploc, &line_len);
  if (!line)
    return false;

  int idx;

  /* Skip leading whitespace.  */
  for (idx = 0; idx < line_len; idx++)
    if (!ISSPACE (line[idx]))
      break;
  if (idx == line_len)
    return false;

  /* Require a '#' character.  */
  if (line[idx] != '#')
    return false;
  idx++;

  /* Skip whitespace.  */
  while (idx < line_len)
    {
      if (!ISSPACE (line[idx]))
	break;
      idx++;
    }

  /* Match #if/#ifdef/#ifndef.  */
  if (idx + 2 <= line_len)
    if (line[idx] == 'i')
      if (line[idx + 1] == 'f')
	return true;

  return false;
}


/* Determine if there is preprocessor logic between
   BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't
   issue a warning for cases like this:

	if (flagA)
	  foo ();
	  ^ BODY_EXPLOC
      #if SOME_CONDITION_THAT_DOES_NOT_HOLD
	if (flagB)
      #endif
	  bar ();
	  ^ NEXT_STMT_EXPLOC

   despite "bar ();" being visually aligned below "foo ();" and
   being (as far as the parser sees) the next token.

   Return true if such logic is detected.  */

static bool
detect_preprocessor_logic (expanded_location body_exploc,
			   expanded_location next_stmt_exploc)
{
  gcc_assert (next_stmt_exploc.file == body_exploc.file);
  gcc_assert (next_stmt_exploc.line > body_exploc.line);

  if (next_stmt_exploc.line - body_exploc.line < 4)
    return false;

  /* Is there a #if/#ifdef/#ifndef directive somewhere in the lines
     between the given locations?

     This is something of a layering violation, but by necessity,
     given the nature of what we're testing for.  For example,
     in theory we could be fooled by a #if within a comment, but
     it's unlikely to matter.  */
  for (int line = body_exploc.line + 1; line < next_stmt_exploc.line; line++)
    if (line_contains_hash_if (body_exploc.file, line))
      return true;

  /* Not found.  */
  return false;
}


/* Helper function for warn_for_misleading_indentation; see
   description of that function below.  */

static bool
should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
					const token_indent_info &body_tinfo,
					const token_indent_info &next_tinfo)
{
  location_t guard_loc = guard_tinfo.location;
  location_t body_loc = body_tinfo.location;
  location_t next_stmt_loc = next_tinfo.location;

  enum cpp_ttype next_tok_type = next_tinfo.type;

  /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC
     if either are within macros.  */
  if (linemap_location_from_macro_expansion_p (line_table, body_loc)
      || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc))
    return false;

  /* Don't attempt to compare indentation if #line or # 44 "file"-style
     directives are present, suggesting generated code.

     All bets are off if these are present: the file that the #line
     directive could have an entirely different coding layout to C/C++
     (e.g. .md files).

     To determine if a #line is present, in theory we could look for a
     map with reason == LC_RENAME_VERBATIM.  However, if there has
     subsequently been a long line requiring a column number larger than
     that representable by the original LC_RENAME_VERBATIM map, then
     we'll have a map with reason LC_RENAME.
     Rather than attempting to search all of the maps for a
     LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one
     is seen, and we check for the flag here.
  */
  if (line_table->seen_line_directive)
    return false;

  /* If the token following the body is a close brace or an "else"
     then while indentation may be sloppy, there is not much ambiguity
     about control flow, e.g.

     if (foo)       <- GUARD
       bar ();      <- BODY
       else baz (); <- NEXT

     {
     while (foo)  <- GUARD
     bar ();      <- BODY
     }            <- NEXT
     baz ();
  */
  if (next_tok_type == CPP_CLOSE_BRACE
      || next_tinfo.keyword == RID_ELSE)
    return false;

  /* Don't warn here about spurious semicolons.  */
  if (next_tok_type == CPP_SEMICOLON)
    return false;

  expanded_location body_exploc = expand_location (body_loc);
  expanded_location next_stmt_exploc = expand_location (next_stmt_loc);

  /* They must be in the same file.  */
  if (next_stmt_exploc.file != body_exploc.file)
    return false;

  /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider
     the location of the guard.

     Cases where we want to issue a warning:

       if (flag)
         foo ();  bar ();
                  ^ WARN HERE

       if (flag) foo (); bar ();
                         ^ WARN HERE

     Cases where we don't want to issue a warning:

       various_code (); if (flag) foo (); bar (); more_code ();
                                          ^ DON'T WARN HERE.  */
  if (next_stmt_exploc.line == body_exploc.line)
    {
      expanded_location guard_exploc = expand_location (guard_loc);
      if (guard_exploc.file != body_exploc.file)
	return true;
      if (guard_exploc.line < body_exploc.line)
	/* The guard is on a line before a line that contains both
	   the body and the next stmt.  */
	return true;
      else if (guard_exploc.line == body_exploc.line)
	{
	  /* They're all on the same line.  */
	  gcc_assert (guard_exploc.file == next_stmt_exploc.file);
	  gcc_assert (guard_exploc.line == next_stmt_exploc.line);
	  /* Heuristic: only warn if the guard is the first thing
	     on its line.  */
	  if (is_first_nonwhitespace_on_line (guard_exploc))
	    return true;
	}
    }

  /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider
     their relative locations, and of the guard.

     Cases where we want to issue a warning:
        if (flag)
          foo ();
          bar ();
          ^ WARN HERE

     Cases where we don't want to issue a warning:
        if (flag)
        foo ();
        bar ();
        ^ DON'T WARN HERE (autogenerated code?)

	if (flagA)
	  foo ();
      #if SOME_CONDITION_THAT_DOES_NOT_HOLD
	if (flagB)
      #endif
	  bar ();
	  ^ DON'T WARN HERE

        if (flag) {
          foo ();
        } else
        {
          bar ();
        }
        baz ();
        ^ DON'T WARN HERE
  */
  if (next_stmt_exploc.line > body_exploc.line)
    {
      /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same
	 "visual column"...  */
      unsigned int next_stmt_vis_column;
      unsigned int body_vis_column;
      /* If we can't determine it, don't issue a warning.  This is sometimes
	 the case for input files containing #line directives, and these
	 are often for autogenerated sources (e.g. from .md files), where
	 it's not clear that it's meaningful to look at indentation.  */
      if (!get_visual_column (next_stmt_exploc, &next_stmt_vis_column))
	return false;
      if (!get_visual_column (body_exploc, &body_vis_column))
	return false;
      if (next_stmt_vis_column == body_vis_column)
	{
	  /* Don't warn if they aren't aligned on the same column
	     as the guard itself (suggesting autogenerated code that
	     doesn't bother indenting at all).  */
	  expanded_location guard_exploc = expand_location (guard_loc);
	  unsigned int guard_vis_column;
	  if (!get_visual_column (guard_exploc, &guard_vis_column))
	    return false;
	  if (guard_vis_column == body_vis_column)
	    return false;

	  /* PR 66220: Don't warn if the guarding statement is more
	     indented than the next/body stmts.  */
	  if (guard_vis_column > body_vis_column)
	    return false;

	  /* Don't warn if there is multiline preprocessor logic between
	     the two statements. */
	  if (detect_preprocessor_logic (body_exploc, next_stmt_exploc))
	    return false;

	  /* Otherwise, they are visually aligned: issue a warning.  */
	  return true;
	}
    }

  return false;
}

/* Return the string identifier corresponding to the given guard token.  */

static const char *
guard_tinfo_to_string (const token_indent_info &guard_tinfo)
{
  switch (guard_tinfo.keyword)
    {
    case RID_FOR:
      return "for";
    case RID_ELSE:
      return "else";
    case RID_IF:
      return "if";
    case RID_WHILE:
      return "while";
    case RID_DO:
      return "do";
    default:
      gcc_unreachable ();
    }
}

/* Called by the C/C++ frontends when we have a guarding statement at
   GUARD_LOC containing a statement at BODY_LOC, where the block wasn't
   written using braces, like this:

     if (flag)
       foo ();

   along with the location of the next token, at NEXT_STMT_LOC,
   so that we can detect followup statements that are within
   the same "visual block" as the guarded statement, but which
   aren't logically grouped within the guarding statement, such
   as:

     GUARD_LOC
     |
     V
     if (flag)
       foo (); <- BODY_LOC
       bar (); <- NEXT_STMT_LOC

   In the above, "bar ();" isn't guarded by the "if", but
   is indented to misleadingly suggest that it is in the same
   block as "foo ();".

   GUARD_KIND identifies the kind of clause e.g. "if", "else" etc.  */

void
warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
				 const token_indent_info &body_tinfo,
				 const token_indent_info &next_tinfo)
{
  /* Early reject for the case where -Wmisleading-indentation is disabled,
     to avoid doing work only to have the warning suppressed inside the
     diagnostic machinery.  */
  if (!warn_misleading_indentation)
    return;

  if (should_warn_for_misleading_indentation (guard_tinfo,
					      body_tinfo,
					      next_tinfo))
    {
      if (warning_at (next_tinfo.location, OPT_Wmisleading_indentation,
		      "statement is indented as if it were guarded by..."))
        inform (guard_tinfo.location,
		"...this %qs clause, but it is not",
		guard_tinfo_to_string (guard_tinfo));
    }
}