summaryrefslogtreecommitdiff
path: root/include_server/include_analyzer.py
blob: 450b593a049b2ac71990c412981b55d6adddfa09 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#! /usr/bin/python2.4

# Copyright 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
# USA.
 
"""The skeleton for an include analyzer.

This module defines the basic caches and helper functions for an
include analyzer.
"""

__author__ = "Nils Klarlund"

import os
import glob

import basics
import macro_eval
import parse_file
import parse_command
import statistics
import cache_basics
import mirror_path
import compress_files

Debug = basics.Debug
DEBUG_TRACE = basics.DEBUG_TRACE
NotCoveredError = basics.NotCoveredError

class IncludeAnalyzer(object):
  """The skeleton, including caches, of an include analyzer."""

  def _InitializeAllCaches(self):
    # Make cache for parsed files.
    self.file_cache = {}
    # Make table for symbols in #define's.
    self.symbol_table = {}
    # Erect the edifice of caches.
    caches = self.caches = (
        cache_basics.SetUpCaches(self.client_root_keeper.client_root))

    # Migrate the cache stuff to self namespace.
    self.includepath_map = caches.includepath_map
    self.directory_map = caches.directory_map
    self.realpath_map = caches.realpath_map

    self.canonical_path = caches.canonical_path
    self.dirname_cache = caches.dirname_cache
    self.compiler_defaults = caches.compiler_defaults
    self.systemdir_prefix_cache = caches.systemdir_prefix_cache

    self.simple_build_stat = caches.simple_build_stat
    self.build_stat_cache = caches.build_stat_cache

    self.IsIncludepathIndex = caches.IsIncludepathIndex
    self.IsSearchdirIndex = caches.IsSearchdirIndex
    self.IsCurrdirIndex = caches.IsCurrdirIndex
    self.IsRealpathIndex = caches.IsRealpathIndex
    self.IsFilepathPair = caches.IsFilepathPair

    # Make a cache for the symbolic links encountered; also for their
    # replication into root directory.
    self.mirror_path = mirror_path.MirrorPath(self.simple_build_stat,
                                              self.canonical_path,
                                              self.realpath_map,
                                              self.systemdir_prefix_cache)
    # Make a parser for C/C++.
    self.parse_file = parse_file.ParseFile(self.includepath_map)
    # Make a compressor for source files.
    self.compress_files = compress_files.CompressFiles(self.includepath_map,
                                                       self.directory_map,
                                                       self.realpath_map,
                                                       self.mirror_path)
    # A fast cache for avoiding calls into the mirror_path object.
    self.mirrored = set([])

    # For statistics only. We measure the different search lists
    # (search paths) by accumulating them all in sets.
    self.quote_dirs_set = set([]) # quote search lists
    self.angle_dirs_set = set([]) # angle searchlists
    self.include_dir_pairs = set([]) # the pairs (quote search list,
                                     # angle search lists)

  def __init__(self, client_root_keeper, stat_reset_triggers={}):
    self.generation = 1
    self.client_root_keeper = client_root_keeper
    self.client_root_keeper.ClientRootMakedir(self.generation)
    self.stat_reset_triggers = stat_reset_triggers
    self.translation_unit = "unknown translation unit"
    self.timer = None
    self.include_server_cwd = os.getcwd()
    self._InitializeAllCaches()

  def _ProcessFileFromCommandLine(self, fpath, currdir, kind, search_list):
    """Return closure of fpath whose kind is "translation unit" or "include".
       Such files come from the command line, either as the file to compile,
       or from a "-include" command line option.
    Arguments:
      fpath: a filepath (as a string)
      currdir: a string
      kind: a string used for an error message if fpath is not found
      search_list: a tuple of directory indices (for "include" kind files)
    Returns:
      an include closure calculated by RunAlgorithm
    """
    # We allow the filepath to be absolute. We do not tolerate absolute
    # includepaths, in general, and so must be careful here, because we use
    # build_stat_cache.Resolve. We prepare to use the searchdir parameter of
    # Resolve.
    if os.path.isabs(fpath):
      file_dirpath, file_filename = os.path.split(fpath)
    else:
      # Use empty string as directory name (offset from currdir)
      file_dirpath, file_filename = "", fpath
    fpath_resolved_pair, fpath_real = self.build_stat_cache.Resolve(
      self.includepath_map.Index(file_filename),
      self.currdir_idx,
      self.directory_map.Index(file_dirpath),
      search_list)
    if fpath_resolved_pair == None:
      raise NotCoveredError("Could not find %s '%s'." % (kind, fpath),
                            send_email=False)
    # We must inspect the path to replicate directories and symlinks.
    self.mirror_path.DoPath(
        os.path.join(currdir, fpath),
        self.currdir_idx,
        self.client_root_keeper.client_root)
    
    closure = self.RunAlgorithm(fpath_resolved_pair, fpath_real)
    return closure

  def ProcessCompilationCommand(self, currdir, parsed_command):
    """Do the include analysis for parsed_command.

    Precondition:
      currdir == os.getcwd()

    Arguments:
      currdir: a string denoting an absolute filepath when command is run
      parsed_command: the value returned by ParseCommandArgs

    Returns:
      an include closure as described in RunAlgorithm
    """

    Debug(DEBUG_TRACE, "ProcessCompilationCommand: %s, %s"
          % (currdir, parsed_command))

    assert isinstance(currdir, str)
    statistics.parse_file_counter_last = statistics.parse_file_counter
    (self.quote_dirs, self.angle_dirs,
     self.include_files, translation_unit,
     self.result_file_prefix, self.d_opts) = parsed_command

    statistics.translation_unit = translation_unit
    self.translation_unit = translation_unit

    self.currdir_idx = self.directory_map.Index(currdir)

    # Statistics only.
    self.include_dir_pairs |= set([(self.quote_dirs, self.angle_dirs)])
    self.quote_dirs_set.add(self.quote_dirs)
    self.angle_dirs_set.add(self.angle_dirs)
    statistics.quote_path_total += len(self.quote_dirs)
    statistics.angle_path_total += len(self.angle_dirs)

    total_closure = {}
    for include_file in self.include_files:
      total_closure.update(
        self._ProcessFileFromCommandLine(
          self.includepath_map.string[include_file],
          currdir,
          "include file",
          self.quote_dirs))
    total_closure.update(self._ProcessFileFromCommandLine(translation_unit,
                                                          currdir,
                                                          "translation unit",
                                                          ()))
    return total_closure

  def DoStatResetTriggers(self):
    """Reset stat caches if a glob evaluates differently from earlier.
    
    More precisely, if a path of a glob comes in or out of existence or has a
    new stamp, then reset stat caches."""

    trigger_map = self.stat_reset_triggers
    old_paths = [ path
                  for glob_expr in trigger_map
                  for path in trigger_map[glob_expr] ]
    for glob_expr in trigger_map:
      for path in glob.glob(glob_expr):
        try:
          old_paths.remove(path)
        except ValueError:
          pass
        new_stamp = basics.Stamp(path)
        if path in trigger_map[glob_expr]:
          if new_stamp != trigger_map[glob_expr][path]:
            Debug(basics.DEBUG_WARNING,
                  "Path '%s' changed. Clearing caches.",
                  path)
            trigger_map[glob_expr][path] = new_stamp
            self.ClearStatCaches()
            return
        else:
          Debug(basics.DEBUG_WARNING,
                "Path '%s' came into existence. Clearing caches.",
                path)
          trigger_map[glob_expr][path] = basics.Stamp(path)
          self.ClearStatCaches()
          return
    if old_paths:
      path = old_paths[0]
      Debug(basics.DEBUG_WARNING,
            "Path '%s' no longer exists. Clearing caches.",
            path)
      self.ClearStatCaches()

  def DoCompilationCommand(self, cmd, currdir, client_root_keeper):
    """Parse and and process the command; then gather files and links."""
    
    self.translation_unit = "unknown translation unit"  # don't know yet 

    # Any relative paths in the globs in the --stat_reset_trigger argument
    # must be evaluated relative to the include server's original working
    # directory.
    os.chdir(self.include_server_cwd)
    self.DoStatResetTriggers()

    # Now change to the distcc client's working directory.
    # That'll let us use os.path.join etc without including currdir explicitly.
    os.chdir(currdir)

    parsed_command = (
        parse_command.ParseCommandArgs(cmd,
                                       currdir,
                                       self.includepath_map,
                                       self.directory_map,
                                       self.compiler_defaults,
                                       self.timer))
    (unused_quote_dirs, unused_angle_dirs, unused_include_files, source_file,
     result_file_prefix, unused_Dopts) = parsed_command

    # Do the real work.
    include_closure = (
      self.ProcessCompilationCommand(currdir, parsed_command))
    # Cancel timer before I/O in compress_files.
    if self.timer:  # timer may not always exist when testing
      self.timer.Cancel()
    # Get name of the initial source file
    translation_unit = self.translation_unit
    # Links are accumulated intra-build (across different compilations in a
    # build). We send all of 'em every time.  This will potentially lead to
    # performance degradation for large link farms. We expect at most a
    # handful. We add put the system links first, because there should be very
    # few of them.
    links = self.compiler_defaults.system_links + self.mirror_path.Links()
    files = self.compress_files.Compress(include_closure, client_root_keeper,
                                         self.currdir_idx)

    files_and_links = files + links

    # Note that the performance degradation comment above applies especially
    # to forced include directories, unless disabled with --no_force_dirs
    if basics.opt_no_force_dirs == False:
      files_and_links += self._ForceDirectoriesToExist()

    realpath_map = self.realpath_map

    if basics.opt_verify:
      # Invoke the real preprocessor.
      exact_no_system_header_dependency_set = (
        ExactDependencies(" ".join(cmd),
                          realpath_map,
                          self.systemdir_prefix_cache,
                          translation_unit))
      if basics.opt_write_include_closure:
        WriteDependencies(exact_no_system_header_dependency_set,
                          self.result_file_prefix + '.d_exact',
                          realpath_map)
      VerifyExactDependencies(include_closure,
                              exact_no_system_header_dependency_set,
                              realpath_map,
                              translation_unit)
    if basics.opt_write_include_closure:
      WriteDependencies(include_closure,
                        self.result_file_prefix + '.d_approx',
                        realpath_map)
    return files_and_links

  def _ForceDirectoriesToExist(self):
    """Force any needed directories to exist.

    In rare cases, the source files may contain #include "foo/../bar",
    but may not contain any other files from the "foo" directory.
    In such cases, we invent a dummy file in (the mirrored copy of)
    each such directory, just to force the distccd server to create the
    directory, so that the C compiler won't get an error when it tries
    to resolve that #include.

    Returns:
      A list of files to pass as dummy inputs.
    """

    must_exist_dirs = self.mirror_path.MustExistDirs()
    # Note: distcc's --scan-includes option needs to
    # know about this name; see ../src/compile.c.
    special_name = 'forcing_technique_271828'
    forcing_files = [d + '/' + special_name
                     for d in must_exist_dirs]
    for forcing_file in forcing_files:
      # If for extremly obscure reasons the file already exists and is useful,
      # then don't change it: that's why we open in "append" mode.
      open(forcing_file, "a").close()
    return forcing_files

  def RunAlgorithm(self, filepath_resolved_pair, filepath_real_idx):
    """Run FindNode on filepath; then compute include closure.
    Arguments:
      filepath_resolved_pair: (directory_idx, includepath_idx)
      filepath_real: the realpath_map index corresponding to
        filepath_resolved_pair
    Returns:
      include_closure: a dictionary.

    The include_closure consists of entries of the form

        realpath_idx: [(searchdir_idx_1, includepath_idx_1),
                       (searchdir_idx_2, includepath_idx_2), ...]

    where searchdir_i is an absolute path.  realpath_idx is a realpath
    index corresponding to a single #include (more exactly, it's the
    index of the path that the #include resolves to).
    
    This include closure calculation omits any system header files,
    that is, header files found in a systemdir (recall systemdirs are
    those searchdirs that are built into the preprocessor, such as
    "/usr/include").  It concentrates only on header files users might
    edit.

    The keys are the most important part of the include_closure; the
    values are used only to munge the preprocessor output to give more
    useful filenames via the #line directive.  The issue here is that
    source files in the distcc system are not in their "proper"
    locations: for instance, /usr/X11R6/include/X11.h might be in
    /tmp/distcc/usr/X11R6/include/X11.h rather than in
    /usr/X11R6/include.

    As the example above suggests, relative position of .h files is
    preserved in distcc-land, so if the #include ends up being a
    relative include, we do not need to do any munging, so we don't
    bother to store anything in the value-list corresponding to
    realpath_idx.  If, however, the #include ends up being an absolute
    include, we do store the "real" name (as an index-pair) in the
    list.  For debugging purposes, we may store more than one "real"
    name if there are several, which can happen when multiple symlinks
    point to the same place.
    TODO(csilvers): change the code to only store one.

    Here's a concrete example: suppose we're trying to resolve
    #include "bar.h", and the searchdir_list is ["reldir/foo",
    "/usr/foo"].  If "<cwd>/reldir/foo/bar.h" exists, then
    realpath_idx will resolve to that, and the preprocessor will emit
    code like "#line 1 reldir/foo/bar.h".  That's correct as-is, no
    munging needed, so we don't bother to put a value in the
    include_closure entry for this realpath.

    If, however, "<cwd>/reldir/foo/bar.h" does not exist, but
    "/usr/foo/bar.h" exists, then realpath_idx will resolve to that,
    and the preprocessor will emit code like "#line 1
    /tmp/distcc/usr/foo/bar.h".  We'll want to munge that to be
    "/usr/foo/bar.h", so we do put a value in the include_closure
    entry for this realpath, to tell us what to munge to.

    (Note we *could* use realpath to tell us the "real" filename,
    without needing a separate index-pair, but that's not as
    user-friendly, since realpath is the filename after symlinks are
    resolved.  Thus, on some setups the realpath of /usr/foo/bar.h
    could be /netapp1/mnt/foo/bar.h or something equally unhelpful.)

    This method to be overridden by derived class.
    """

    raise Exception, "RunAlgorithm not implemented."

  def ClearStatCaches(self):
    """Clear caches used for, or dependent on, stats."""
    self.generation += 1
    # Tabula rasa: for this analysis, we must forget everything recorded in the
    # client_root directory about source files, directories, and symbolic links.
    # But we cannot delete any such information, because slow-poke distcc
    # clients that have received earlier include manifests perhaps only now get
    # around to reading a previous generation client root directory.
    self.client_root_keeper.ClientRootMakedir(self.generation)
    self._InitializeAllCaches()