summaryrefslogtreecommitdiff
path: root/include_server/compiler_defaults.py
blob: da4e05a3aa79efa5adb54c7ff721bcba712cdafe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
#! /usr/bin/env python3

# Copyright 2007 Google Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
# USA.


"""Divination of built-in system directories used by compiler installation.

It is undesirable for the distcc-pump to send header files that reside
under the built-in search path.  In a correct compiler installation,
these files must already be present on the server. This module lets
the distcc-pump run the compiler in a special mode that allows the
built-in system directories to be revealed.

The current code is tested only for gcc 4.1.1.

TODO(klarlund) Find out what other versions this code works for.
TODO(klarlund) The include server halts if the built-in system
directories cannot be determined. Should this be improved upon?
"""

__author__ = "Nils Klarlund"


import os
import re
import sys
import basics
import shutil
import subprocess

Debug = basics.Debug
DEBUG_TRACE = basics.DEBUG_TRACE
DEBUG_DATA = basics.DEBUG_DATA
NotCoveredError = basics.NotCoveredError


def _RealPrefixWithinClientRoot(client_root, path):
  """Determine longest directory prefix of PATH and whether PATH contains a symlink.

  Given an absolute path CLIENT_ROOT and an absolute path PATH that is
  interpreted as relative to CLIENT_ROOT, figure out the longest prefix
  of PATH such that every component of the prefix is a directory -- not
  a file or symlink -- when interpreted relative to CLIENT_ROOT.

  Args:
    path: a string starting with '/'
  Returns:
    a pair consisting of
    - the prefix
    - a bool, which is True iff PATH contained a symlink.
  """
  prefix = "/"
  parts = path.split('/')
  while prefix != path:
    part = parts.pop(0)
    last_prefix = prefix
    prefix = os.path.join(prefix, part)
    if os.path.islink(client_root + prefix):
      return last_prefix, True
    if not os.path.isdir(client_root + prefix):
      return last_prefix, False
  return path, False


def _MakeLinkFromMirrorToRealLocation(system_dir, client_root, system_links):
  """Create a link under client root what will resolve to system dir on server.

  See comments for CompilerDefaults class for rationale.

  Args:
    system_dir: a path such as /usr/include or
                /usr/lib/gcc/i486-linux-gnu/4.0.3/include
    client_root: a path such as /dev/shm/tmpX.include_server-X-1
    system_links: a list of paths under client_root; each denotes a symlink

  The link is created only if necessary. So,
    /usr/include/gcc/i486-linux-gnu/4.0.3/include
  is not created if
    /usr/include
  is already in place, since it's a prefix of the longer path.

  If a link is created, the symlink name will be appended to system_links.

  For example, if system_dir is '/usr/include' and client_root is
  '/dev/shm/tmpX.include_server-X-1', then this function will create a
  symlink in /dev/shm/tmpX.include_server-X-1/usr/include which points
  to ../../../../../../../../../../../../usr/include, and it will append
  '/dev/shm/tmpX.include_server-X-1/usr/include' to system_links.
  """
  if not system_dir.startswith('/'):
    raise ValueError("Expected absolute path, but got '%s'." % system_dir)
  if os.path.realpath(system_dir) != system_dir:
    raise NotCoveredError(
        "Default compiler search path '%s' must be a realpath." %s)
  # Typical values for rooted_system_dir:
  #  /dev/shm/tmpX.include_server-X-1/usr/include
  real_prefix, is_link = _RealPrefixWithinClientRoot(client_root, system_dir)
  parent = os.path.dirname(system_dir)
  rooted_system_dir = client_root + system_dir
  rooted_parent = client_root + parent
  if real_prefix == system_dir:
    # rooted_system_dir already exists as a real (non-symlink) path.
    # Make rooted_system_dir a link.
    #
    # For example, this could happen if /usr/include/c++/4.0 and
    # /usr/include are both default system directories.
    # First we'd call this function with /usr/include/c++/4.0,
    # and it would call os.mkdirdirs() to create
    # /dev/shm/tmpX.include_server-X-1/usr/include/c++,
    # and then it would create a symlink named 4.0 within that.
    # Then we'd call this function again with /usr/include.
    # In this case, we can replace the whole subtree with a single symlink
    # at /dev/shm/tmpX.include_server-X-1/usr/include.
    shutil.rmtree(rooted_system_dir)
    system_links[:] = filter(lambda path :
                             not path.startswith(rooted_system_dir),
                             system_links)
  elif real_prefix == parent:
    # The really constructed path does not extend beyond the parent directory,
    # so we're all set to create the link if it's not already there.
    if os.path.exists(rooted_system_dir):
      assert os.path.islink(rooted_system_dir)
      return
  elif not is_link:
    os.makedirs(rooted_parent)
  else:
    # A link above real_prefix has already been created with this routine.
    return
  assert _RealPrefixWithinClientRoot(client_root, parent) == (parent, False), (client_root, parent)
  depth = len([c for c in system_dir if c == '/'])
  # The more directories on the path system_dir, the more '../' need to
  # appended. We add enough '../' to get to the root directory. It's OK
  # if we have too many, since '..' in the root directory points back to
  # the root directory.
  # TODO(klarlund,fergus): do this in a more principled way.
  # This probably requires changing the protocol.
  os.symlink('../' * (basics.MAX_COMPONENTS_IN_SERVER_ROOT + depth)
             + system_dir[1:],  # remove leading '/'
             rooted_system_dir)
  system_links.append(rooted_system_dir)


def _SystemSearchdirsGCC(compiler, sysroot, language, canonical_lookup):
  """Run gcc on empty file; parse output to figure out default paths.

  This function works only for gcc, and only some versions at that.

  Arguments:
    compiler: a filepath (the first argument on the distcc command line)
    sysroot: the --sysroot passed to the compiler ("" to disable)
    language: 'c' or 'c++' or other item in basics.LANGUAGES
    canonical_lookup: a function that maps strings to their realpaths
  Returns:
    list of system search dirs for this compiler and language

  """

  # We are trying to wring the following kind of text out of the
  # compiler:
  #--------------------
  # blah. blah.
  # ...
  # blah. blah.
  # #include "..." search starts here:
  # #include <...> search starts here:
  #  /usr/local/include
  #  /usr/lib/gcc/i486-linux-gnu/4.0.3/include
  #  /usr/include
  # End of search list.
  # blah. blah.
  #------------

  command = [compiler]
  if sysroot:
    command += ["--sysroot=" + sysroot]
  command += ["-x", language, "-v", "-c", "/dev/null", "-o", "/dev/null"]
  Debug(DEBUG_DATA, "system search dirs command: %s" % command)

  try:
    # We clear the environment, because otherwise, directories
    # declared by CPATH, for example, will be incorporated into the
    # result. (See the CPP manual for the meaning of CPATH.)  The only
    # thing we keep is PATH, so we can be sure to find the compiler.
    # NOTE: having the full PATH can be tricky: what if there's a gcc
    # -> distcc symlink somewhere on the PATH, before the real gcc?
    # We think the right thing will happen here, but it's complicated.
    # TODO(csilvers): it's possible we could need to pass in some
    # other environment vars, like LD_LIBRARY_PATH.  Instead of adding
    # in more env-vars by hand, consider just removing from os.environ
    # all the env-vars that are meaningful to gcc, such as CPATH.  See
    # http://docs.freebsd.org/info/gcc/gcc.info.Environment_Variables.html,
    # or the "Environment Variables Affecting GCC" section of the gcc
    # info page.
    if 'PATH' in os.environ:
      trimmed_env = {'PATH': os.environ['PATH']}
    else:
      trimmed_env = {}
    p = subprocess.Popen(command,
                         shell=False,
                         stdin=None,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         env=trimmed_env,universal_newlines=True)
    out = p.communicate()[0]
  except (IOError, OSError) as why:
    raise NotCoveredError (
             ( "Couldn't determine default system include directories\n"
             + "for compiler '%s', language '%s':\n"
             + "error executing '%s': %s.")
             % (compiler, language, command, why))

  if p.returncode != 0:
    raise NotCoveredError(
             ( "Couldn't determine default system include directories\n"
             + "for compiler '%s', language '%s':\n"
             + "command '%s' exited with status '%d'.\n Command output:\n%s") %
             (compiler, language, command, p.returncode, out))

  match_obj = re.search(
    "%s\n(.*?)\n%s"  # don't ask
    % ("#include <...> search starts here:", "End of search list"),
    out,
    re.MULTILINE + re.DOTALL)
  if match_obj == None:
    raise NotCoveredError(
             ( "Couldn't determine default system include directories\n"
             + "for compiler '%s', language '%s':\n"
             + "couldn't parse output of '%s'.\nReceived:\n%s") %
             (compiler, language, command, out))
  return [ canonical_lookup(directory)
           for line in match_obj.group(1).split("\n")
           for directory in line.split()
           # Ignore Apple-modified MacOS gcc's "framework" directories.
           if not line.endswith(" (framework directory)")
           ]
           # TODO: Rather than just ignoring framework directories, we
           # should handle them properly, fully emulating the search
           # algorithm used by Apple's modified GCC.
           # The search algorithm used for framework directories is not very
           # well documented, as far as I can tell, but the source code is in
           # gcc/config/darwin-c.c in the Apple GCC sources.
           # From a quick glance, I think it looks like this:
           # - For each #include of the form Foo/bar.h,
           #        For each framework directory Baz,
           #            Look in Baz/Foo.framework/Headers/bar.h
           #            and in Baz/Foo.framework/PrivateHeaders/bar.h
           # - If the regular search fails, look for subframeworks.
           #     For each #include of the form Foo/bar.h
           #       from Baz/Quux.framework/Headers/whatever.h
           #            Look in Baz/Quux.framework/Frameworks/Foo/Headers/bar.h.

class CompilerDefaults(object):
  """Records and caches the default search dirs and creates symlink farm.

  This function works only for gcc, and only some versions at that,
  because we parse the output from gcc to determine the default search dirs.

  The 'default' searchdirs are those on the search-path that are built in, that
  is known to the preprocessor, as opposed to being set on the commandline via
  -I et al.

  When we pass an option such as -I/foo/bar to the server,
  the server will rewrite it to say -I/server/path/root/foo/bar,
  where /server/path/root is the temporary directory on the server
  that corresponds to root on the client (e.g. typically /dev/shm/distccd_nnn).
  This causes problems in this case of -I options such as -I/usr/include/foo,
  where the path contains a 'default' search directory (in this case
  /usr/include) as a prefix.
  Header files under the system default directories are assumed to exist
  on the server, and it would be expensive to send them to the server
  unnecessarily (we measured it, and it slowed down the build of Samba by 20%).
  So for -I options like -I/usr/include/foo, we want the server
  to use /usr/include/foo on the server, not /server/path/root/usr/include/foo.

  Because the server unconditionally rewrites include search
  paths on the command line to be relative to the server root, we must take
  corrective action when identifying default system dirs: references to files
  under these relocated system directories must be redirected to the absolute
  location where they're actually found.

  To do so, we create a symlink forest under client_root.
  This will contain symlinks of the form

    usr/include -> ../../../../../../../../../../../../usr/include

  After being sent to the server, the server will rewrite them as

    /server/path/root/usr/include ->
       /server/path/root/../../../../../../../../../../../../usr/include

  which will make

     /server/path/root/usr/include

  become a symlink to

     /usr/include

  Consequently, an include search directory such as -I /usr/include/foo will
  work on the server, even after it has been rewritten to:

    -I /server/path/root/usr/include/foo
  """

  def __init__(self, canonical_lookup, client_root):
    """Constructor.

    Instance variables:
      system_dirs_real_paths: a dictionary such that
        system_dirs_real_paths[c][lang] is a list of directory paths
        (strings) for compiler c and language lang
      system_dirs_default: a list of all such strings, subjected to
        realpath-ification, for all c and lang
      client_root: a path such as /dev/shm/tmpX.include_server-X-1
      system_links: locations under client_root representing system default dirs
    """
    self.canonical_lookup = canonical_lookup
    self.system_dirs_default_all = set([])
    self.system_dirs_default = {}
    self.system_links = []
    self.client_root = client_root

  def SetSystemDirsDefaults(self, compiler, sysroot, language, timer=None):
    """Set instance variables according to compiler, and make symlink farm.

    Arguments:
      compiler: a filepath (the first argument on the distcc command line)
      sysroot: the --sysroot passed to the compiler ("" to disable)
      language: 'c' or 'c++' or other item in basics.LANGUAGES
      timer: a basis.IncludeAnalyzerTimer or None

    The timer will be disabled during this routine because the select involved
    in Popen calls does not handle SIGALRM.

    See also the class documentation for this class.
    """
    assert isinstance(compiler, str)
    assert isinstance(language, str)
    Debug(DEBUG_TRACE,
          "SetSystemDirsDefaults with CC, SYSROOT, LANG: %s, %s, %s" %
          (compiler, sysroot, language))
    if compiler in self.system_dirs_default:
      if sysroot in self.system_dirs_default[compiler]:
        if language in self.system_dirs_default[compiler][sysroot]:
          return
      else:
        self.system_dirs_default[compiler][sysroot] = {}
    else:
      self.system_dirs_default[compiler] = {sysroot: {}}
    try:
      if timer:
        # We have to disable the timer because the select system call that is
        # executed when calling the compiler through Popen gives up if presented
        # with a SIGALRM.
        timer.Stop()
      self.system_dirs_default[compiler][sysroot][language] = (
        _SystemSearchdirsGCC(compiler,
                             sysroot, language, self.canonical_lookup))
      Debug(DEBUG_DATA,
            "system_dirs_default[%s][%s][%s]: %s" %
            (compiler, sysroot, language,
             self.system_dirs_default[compiler][sysroot][language]))
      # Now summarize what we know and add to system_dirs_default_all.
      self.system_dirs_default_all |= (
          set(self.system_dirs_default[compiler][sysroot][language]))
      # Construct the symlink farm for the compiler default dirs.
      for system_dir in self.system_dirs_default[compiler][sysroot][language]:
        _MakeLinkFromMirrorToRealLocation(system_dir, self.client_root,
                                          self.system_links)
    finally:
      if timer:
        timer.Start()